diff --git "a/EgoQA-GeLM-7B/trainer_state.json" "b/EgoQA-GeLM-7B/trainer_state.json" new file mode 100644--- /dev/null +++ "b/EgoQA-GeLM-7B/trainer_state.json" @@ -0,0 +1,9808 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 1630, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.0816326530612243e-07, + "loss": 11.0156, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.163265306122449e-07, + "loss": 10.5312, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2244897959183673e-06, + "loss": 10.9531, + "step": 3 + }, + { + "epoch": 0.02, + "learning_rate": 1.6326530612244897e-06, + "loss": 10.4062, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2.0408163265306125e-06, + "loss": 10.5156, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 2.4489795918367347e-06, + "loss": 10.5156, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 2.8571428571428573e-06, + "loss": 9.8281, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 3.2653061224489794e-06, + "loss": 8.9531, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 3.6734693877551024e-06, + "loss": 9.0156, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 4.081632653061225e-06, + "loss": 8.9062, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 4.489795918367348e-06, + "loss": 8.0781, + "step": 11 + }, + { + "epoch": 0.07, + "learning_rate": 4.897959183673469e-06, + "loss": 7.625, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 5.306122448979593e-06, + "loss": 7.7188, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 5.7142857142857145e-06, + "loss": 7.0391, + "step": 14 + }, + { + "epoch": 0.09, + "learning_rate": 6.122448979591837e-06, + "loss": 6.6875, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 6.530612244897959e-06, + "loss": 6.4922, + "step": 16 + }, + { + "epoch": 0.1, + "learning_rate": 6.938775510204082e-06, + "loss": 6.1953, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 7.346938775510205e-06, + "loss": 6.2578, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 7.755102040816327e-06, + "loss": 5.8906, + "step": 19 + }, + { + "epoch": 0.12, + "learning_rate": 8.16326530612245e-06, + "loss": 5.7656, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 8.571428571428571e-06, + "loss": 5.4844, + "step": 21 + }, + { + "epoch": 0.13, + "learning_rate": 8.979591836734695e-06, + "loss": 5.4922, + "step": 22 + }, + { + "epoch": 0.14, + "learning_rate": 9.387755102040818e-06, + "loss": 5.3594, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 9.795918367346939e-06, + "loss": 5.0625, + "step": 24 + }, + { + "epoch": 0.15, + "learning_rate": 1.0204081632653063e-05, + "loss": 4.9219, + "step": 25 + }, + { + "epoch": 0.16, + "learning_rate": 1.0612244897959186e-05, + "loss": 4.9609, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.1020408163265306e-05, + "loss": 4.6172, + "step": 27 + }, + { + "epoch": 0.17, + "learning_rate": 1.1428571428571429e-05, + "loss": 4.7578, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.1836734693877552e-05, + "loss": 4.4219, + "step": 29 + }, + { + "epoch": 0.18, + "learning_rate": 1.2244897959183674e-05, + "loss": 4.4688, + "step": 30 + }, + { + "epoch": 0.19, + "learning_rate": 1.2653061224489798e-05, + "loss": 4.8281, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.3061224489795918e-05, + "loss": 4.6484, + "step": 32 + }, + { + "epoch": 0.2, + "learning_rate": 1.3469387755102042e-05, + "loss": 4.6172, + "step": 33 + }, + { + "epoch": 0.21, + "learning_rate": 1.3877551020408165e-05, + "loss": 4.5938, + "step": 34 + }, + { + "epoch": 0.21, + "learning_rate": 1.4285714285714287e-05, + "loss": 4.5156, + "step": 35 + }, + { + "epoch": 0.22, + "learning_rate": 1.469387755102041e-05, + "loss": 4.5938, + "step": 36 + }, + { + "epoch": 0.23, + "learning_rate": 1.510204081632653e-05, + "loss": 4.2109, + "step": 37 + }, + { + "epoch": 0.23, + "learning_rate": 1.5510204081632655e-05, + "loss": 4.1094, + "step": 38 + }, + { + "epoch": 0.24, + "learning_rate": 1.5918367346938776e-05, + "loss": 4.0742, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.63265306122449e-05, + "loss": 4.1406, + "step": 40 + }, + { + "epoch": 0.25, + "learning_rate": 1.673469387755102e-05, + "loss": 3.9453, + "step": 41 + }, + { + "epoch": 0.26, + "learning_rate": 1.7142857142857142e-05, + "loss": 3.7773, + "step": 42 + }, + { + "epoch": 0.26, + "learning_rate": 1.7551020408163266e-05, + "loss": 4.0781, + "step": 43 + }, + { + "epoch": 0.27, + "learning_rate": 1.795918367346939e-05, + "loss": 4.3906, + "step": 44 + }, + { + "epoch": 0.28, + "learning_rate": 1.836734693877551e-05, + "loss": 4.2031, + "step": 45 + }, + { + "epoch": 0.28, + "learning_rate": 1.8775510204081636e-05, + "loss": 4.0469, + "step": 46 + }, + { + "epoch": 0.29, + "learning_rate": 1.9183673469387756e-05, + "loss": 4.3359, + "step": 47 + }, + { + "epoch": 0.29, + "learning_rate": 1.9591836734693877e-05, + "loss": 3.9688, + "step": 48 + }, + { + "epoch": 0.3, + "learning_rate": 2e-05, + "loss": 4.1289, + "step": 49 + }, + { + "epoch": 0.31, + "learning_rate": 1.9999980257330883e-05, + "loss": 4.2734, + "step": 50 + }, + { + "epoch": 0.31, + "learning_rate": 1.9999921029401478e-05, + "loss": 4.2344, + "step": 51 + }, + { + "epoch": 0.32, + "learning_rate": 1.9999822316445652e-05, + "loss": 4.1797, + "step": 52 + }, + { + "epoch": 0.33, + "learning_rate": 1.9999684118853177e-05, + "loss": 4.0859, + "step": 53 + }, + { + "epoch": 0.33, + "learning_rate": 1.9999506437169723e-05, + "loss": 4.082, + "step": 54 + }, + { + "epoch": 0.34, + "learning_rate": 1.9999289272096886e-05, + "loss": 4.1875, + "step": 55 + }, + { + "epoch": 0.34, + "learning_rate": 1.9999032624492144e-05, + "loss": 3.9062, + "step": 56 + }, + { + "epoch": 0.35, + "learning_rate": 1.999873649536887e-05, + "loss": 4.0664, + "step": 57 + }, + { + "epoch": 0.36, + "learning_rate": 1.9998400885896355e-05, + "loss": 3.9922, + "step": 58 + }, + { + "epoch": 0.36, + "learning_rate": 1.9998025797399753e-05, + "loss": 4.0312, + "step": 59 + }, + { + "epoch": 0.37, + "learning_rate": 1.9997611231360117e-05, + "loss": 3.9297, + "step": 60 + }, + { + "epoch": 0.37, + "learning_rate": 1.9997157189414373e-05, + "loss": 4.0781, + "step": 61 + }, + { + "epoch": 0.38, + "learning_rate": 1.9996663673355326e-05, + "loss": 3.8125, + "step": 62 + }, + { + "epoch": 0.39, + "learning_rate": 1.9996130685131637e-05, + "loss": 3.9375, + "step": 63 + }, + { + "epoch": 0.39, + "learning_rate": 1.999555822684783e-05, + "loss": 4.1602, + "step": 64 + }, + { + "epoch": 0.4, + "learning_rate": 1.9994946300764276e-05, + "loss": 3.9336, + "step": 65 + }, + { + "epoch": 0.4, + "learning_rate": 1.999429490929718e-05, + "loss": 4.207, + "step": 66 + }, + { + "epoch": 0.41, + "learning_rate": 1.999360405501859e-05, + "loss": 3.7422, + "step": 67 + }, + { + "epoch": 0.42, + "learning_rate": 1.9992873740656372e-05, + "loss": 3.9414, + "step": 68 + }, + { + "epoch": 0.42, + "learning_rate": 1.9992103969094182e-05, + "loss": 3.8711, + "step": 69 + }, + { + "epoch": 0.43, + "learning_rate": 1.99912947433715e-05, + "loss": 4.25, + "step": 70 + }, + { + "epoch": 0.44, + "learning_rate": 1.999044606668358e-05, + "loss": 3.8203, + "step": 71 + }, + { + "epoch": 0.44, + "learning_rate": 1.998955794238145e-05, + "loss": 3.9688, + "step": 72 + }, + { + "epoch": 0.45, + "learning_rate": 1.9988630373971896e-05, + "loss": 3.9414, + "step": 73 + }, + { + "epoch": 0.45, + "learning_rate": 1.9987663365117456e-05, + "loss": 3.5312, + "step": 74 + }, + { + "epoch": 0.46, + "learning_rate": 1.99866569196364e-05, + "loss": 3.8789, + "step": 75 + }, + { + "epoch": 0.47, + "learning_rate": 1.9985611041502704e-05, + "loss": 3.9062, + "step": 76 + }, + { + "epoch": 0.47, + "learning_rate": 1.9984525734846056e-05, + "loss": 3.75, + "step": 77 + }, + { + "epoch": 0.48, + "learning_rate": 1.998340100395183e-05, + "loss": 3.8984, + "step": 78 + }, + { + "epoch": 0.48, + "learning_rate": 1.9982236853261067e-05, + "loss": 4.0781, + "step": 79 + }, + { + "epoch": 0.49, + "learning_rate": 1.9981033287370443e-05, + "loss": 3.8672, + "step": 80 + }, + { + "epoch": 0.5, + "learning_rate": 1.9979790311032288e-05, + "loss": 3.7461, + "step": 81 + }, + { + "epoch": 0.5, + "learning_rate": 1.9978507929154534e-05, + "loss": 3.6602, + "step": 82 + }, + { + "epoch": 0.51, + "learning_rate": 1.9977186146800707e-05, + "loss": 3.8555, + "step": 83 + }, + { + "epoch": 0.52, + "learning_rate": 1.9975824969189913e-05, + "loss": 3.8086, + "step": 84 + }, + { + "epoch": 0.52, + "learning_rate": 1.997442440169681e-05, + "loss": 3.9727, + "step": 85 + }, + { + "epoch": 0.53, + "learning_rate": 1.997298444985158e-05, + "loss": 3.6172, + "step": 86 + }, + { + "epoch": 0.53, + "learning_rate": 1.9971505119339923e-05, + "loss": 3.8359, + "step": 87 + }, + { + "epoch": 0.54, + "learning_rate": 1.9969986416003026e-05, + "loss": 3.8594, + "step": 88 + }, + { + "epoch": 0.55, + "learning_rate": 1.9968428345837542e-05, + "loss": 3.7227, + "step": 89 + }, + { + "epoch": 0.55, + "learning_rate": 1.996683091499556e-05, + "loss": 4.125, + "step": 90 + }, + { + "epoch": 0.56, + "learning_rate": 1.9965194129784597e-05, + "loss": 3.832, + "step": 91 + }, + { + "epoch": 0.56, + "learning_rate": 1.9963517996667548e-05, + "loss": 4.0, + "step": 92 + }, + { + "epoch": 0.57, + "learning_rate": 1.9961802522262685e-05, + "loss": 4.0703, + "step": 93 + }, + { + "epoch": 0.58, + "learning_rate": 1.996004771334361e-05, + "loss": 3.7461, + "step": 94 + }, + { + "epoch": 0.58, + "learning_rate": 1.9958253576839256e-05, + "loss": 3.9727, + "step": 95 + }, + { + "epoch": 0.59, + "learning_rate": 1.9956420119833826e-05, + "loss": 4.0664, + "step": 96 + }, + { + "epoch": 0.6, + "learning_rate": 1.9954547349566783e-05, + "loss": 4.2539, + "step": 97 + }, + { + "epoch": 0.6, + "learning_rate": 1.9952635273432835e-05, + "loss": 4.0156, + "step": 98 + }, + { + "epoch": 0.61, + "learning_rate": 1.9950683898981866e-05, + "loss": 4.1406, + "step": 99 + }, + { + "epoch": 0.61, + "learning_rate": 1.994869323391895e-05, + "loss": 4.1523, + "step": 100 + }, + { + "epoch": 0.62, + "learning_rate": 1.9946663286104303e-05, + "loss": 3.9023, + "step": 101 + }, + { + "epoch": 0.63, + "learning_rate": 1.994459406355323e-05, + "loss": 3.8086, + "step": 102 + }, + { + "epoch": 0.63, + "learning_rate": 1.994248557443613e-05, + "loss": 4.0391, + "step": 103 + }, + { + "epoch": 0.64, + "learning_rate": 1.9940337827078448e-05, + "loss": 3.9453, + "step": 104 + }, + { + "epoch": 0.64, + "learning_rate": 1.9938150829960634e-05, + "loss": 4.0039, + "step": 105 + }, + { + "epoch": 0.65, + "learning_rate": 1.993592459171812e-05, + "loss": 3.9883, + "step": 106 + }, + { + "epoch": 0.66, + "learning_rate": 1.9933659121141283e-05, + "loss": 3.6758, + "step": 107 + }, + { + "epoch": 0.66, + "learning_rate": 1.993135442717541e-05, + "loss": 3.793, + "step": 108 + }, + { + "epoch": 0.67, + "learning_rate": 1.9929010518920667e-05, + "loss": 3.7383, + "step": 109 + }, + { + "epoch": 0.67, + "learning_rate": 1.9926627405632048e-05, + "loss": 3.7227, + "step": 110 + }, + { + "epoch": 0.68, + "learning_rate": 1.992420509671936e-05, + "loss": 3.9023, + "step": 111 + }, + { + "epoch": 0.69, + "learning_rate": 1.992174360174717e-05, + "loss": 4.0078, + "step": 112 + }, + { + "epoch": 0.69, + "learning_rate": 1.991924293043478e-05, + "loss": 4.2109, + "step": 113 + }, + { + "epoch": 0.7, + "learning_rate": 1.991670309265617e-05, + "loss": 3.7461, + "step": 114 + }, + { + "epoch": 0.71, + "learning_rate": 1.9914124098439976e-05, + "loss": 4.0039, + "step": 115 + }, + { + "epoch": 0.71, + "learning_rate": 1.9911505957969443e-05, + "loss": 3.8867, + "step": 116 + }, + { + "epoch": 0.72, + "learning_rate": 1.990884868158239e-05, + "loss": 3.9883, + "step": 117 + }, + { + "epoch": 0.72, + "learning_rate": 1.9906152279771162e-05, + "loss": 3.8359, + "step": 118 + }, + { + "epoch": 0.73, + "learning_rate": 1.990341676318259e-05, + "loss": 3.6719, + "step": 119 + }, + { + "epoch": 0.74, + "learning_rate": 1.9900642142617958e-05, + "loss": 3.5898, + "step": 120 + }, + { + "epoch": 0.74, + "learning_rate": 1.9897828429032946e-05, + "loss": 3.9922, + "step": 121 + }, + { + "epoch": 0.75, + "learning_rate": 1.98949756335376e-05, + "loss": 3.8711, + "step": 122 + }, + { + "epoch": 0.75, + "learning_rate": 1.9892083767396274e-05, + "loss": 3.6797, + "step": 123 + }, + { + "epoch": 0.76, + "learning_rate": 1.9889152842027607e-05, + "loss": 4.0078, + "step": 124 + }, + { + "epoch": 0.77, + "learning_rate": 1.9886182869004447e-05, + "loss": 3.8164, + "step": 125 + }, + { + "epoch": 0.77, + "learning_rate": 1.9883173860053845e-05, + "loss": 3.6953, + "step": 126 + }, + { + "epoch": 0.78, + "learning_rate": 1.9880125827056967e-05, + "loss": 3.7344, + "step": 127 + }, + { + "epoch": 0.79, + "learning_rate": 1.9877038782049074e-05, + "loss": 3.6562, + "step": 128 + }, + { + "epoch": 0.79, + "learning_rate": 1.9873912737219468e-05, + "loss": 3.5625, + "step": 129 + }, + { + "epoch": 0.8, + "learning_rate": 1.987074770491145e-05, + "loss": 4.0859, + "step": 130 + }, + { + "epoch": 0.8, + "learning_rate": 1.9867543697622248e-05, + "loss": 3.7344, + "step": 131 + }, + { + "epoch": 0.81, + "learning_rate": 1.9864300728002997e-05, + "loss": 3.9453, + "step": 132 + }, + { + "epoch": 0.82, + "learning_rate": 1.986101880885867e-05, + "loss": 4.1211, + "step": 133 + }, + { + "epoch": 0.82, + "learning_rate": 1.985769795314804e-05, + "loss": 3.7344, + "step": 134 + }, + { + "epoch": 0.83, + "learning_rate": 1.9854338173983615e-05, + "loss": 3.6875, + "step": 135 + }, + { + "epoch": 0.83, + "learning_rate": 1.9850939484631598e-05, + "loss": 3.8125, + "step": 136 + }, + { + "epoch": 0.84, + "learning_rate": 1.9847501898511824e-05, + "loss": 3.707, + "step": 137 + }, + { + "epoch": 0.85, + "learning_rate": 1.9844025429197727e-05, + "loss": 4.0781, + "step": 138 + }, + { + "epoch": 0.85, + "learning_rate": 1.984051009041626e-05, + "loss": 3.8281, + "step": 139 + }, + { + "epoch": 0.86, + "learning_rate": 1.983695589604785e-05, + "loss": 4.0391, + "step": 140 + }, + { + "epoch": 0.87, + "learning_rate": 1.9833362860126364e-05, + "loss": 3.6719, + "step": 141 + }, + { + "epoch": 0.87, + "learning_rate": 1.982973099683902e-05, + "loss": 4.0898, + "step": 142 + }, + { + "epoch": 0.88, + "learning_rate": 1.9826060320526355e-05, + "loss": 3.8281, + "step": 143 + }, + { + "epoch": 0.88, + "learning_rate": 1.982235084568216e-05, + "loss": 3.9219, + "step": 144 + }, + { + "epoch": 0.89, + "learning_rate": 1.9818602586953414e-05, + "loss": 3.9961, + "step": 145 + }, + { + "epoch": 0.9, + "learning_rate": 1.9814815559140258e-05, + "loss": 3.8125, + "step": 146 + }, + { + "epoch": 0.9, + "learning_rate": 1.9810989777195884e-05, + "loss": 3.8164, + "step": 147 + }, + { + "epoch": 0.91, + "learning_rate": 1.9807125256226532e-05, + "loss": 4.1094, + "step": 148 + }, + { + "epoch": 0.91, + "learning_rate": 1.9803222011491385e-05, + "loss": 3.4805, + "step": 149 + }, + { + "epoch": 0.92, + "learning_rate": 1.979928005840255e-05, + "loss": 3.7305, + "step": 150 + }, + { + "epoch": 0.93, + "learning_rate": 1.9795299412524948e-05, + "loss": 3.793, + "step": 151 + }, + { + "epoch": 0.93, + "learning_rate": 1.9791280089576302e-05, + "loss": 4.0312, + "step": 152 + }, + { + "epoch": 0.94, + "learning_rate": 1.978722210542704e-05, + "loss": 3.6953, + "step": 153 + }, + { + "epoch": 0.94, + "learning_rate": 1.9783125476100254e-05, + "loss": 3.7891, + "step": 154 + }, + { + "epoch": 0.95, + "learning_rate": 1.977899021777162e-05, + "loss": 3.6523, + "step": 155 + }, + { + "epoch": 0.96, + "learning_rate": 1.977481634676935e-05, + "loss": 3.9414, + "step": 156 + }, + { + "epoch": 0.96, + "learning_rate": 1.9770603879574108e-05, + "loss": 3.9609, + "step": 157 + }, + { + "epoch": 0.97, + "learning_rate": 1.9766352832818972e-05, + "loss": 3.4336, + "step": 158 + }, + { + "epoch": 0.98, + "learning_rate": 1.9762063223289334e-05, + "loss": 3.6484, + "step": 159 + }, + { + "epoch": 0.98, + "learning_rate": 1.975773506792287e-05, + "loss": 3.8281, + "step": 160 + }, + { + "epoch": 0.99, + "learning_rate": 1.9753368383809445e-05, + "loss": 3.7578, + "step": 161 + }, + { + "epoch": 0.99, + "learning_rate": 1.974896318819106e-05, + "loss": 3.8555, + "step": 162 + }, + { + "epoch": 1.0, + "learning_rate": 1.974451949846177e-05, + "loss": 3.7617, + "step": 163 + }, + { + "epoch": 1.01, + "learning_rate": 1.974003733216765e-05, + "loss": 3.5039, + "step": 164 + }, + { + "epoch": 1.01, + "learning_rate": 1.9735516707006676e-05, + "loss": 3.7344, + "step": 165 + }, + { + "epoch": 1.02, + "learning_rate": 1.973095764082869e-05, + "loss": 3.6172, + "step": 166 + }, + { + "epoch": 1.02, + "learning_rate": 1.972636015163532e-05, + "loss": 3.7734, + "step": 167 + }, + { + "epoch": 1.03, + "learning_rate": 1.9721724257579907e-05, + "loss": 3.543, + "step": 168 + }, + { + "epoch": 1.04, + "learning_rate": 1.9717049976967437e-05, + "loss": 3.7031, + "step": 169 + }, + { + "epoch": 1.04, + "learning_rate": 1.971233732825446e-05, + "loss": 3.543, + "step": 170 + }, + { + "epoch": 1.05, + "learning_rate": 1.9707586330049037e-05, + "loss": 3.6836, + "step": 171 + }, + { + "epoch": 1.06, + "learning_rate": 1.9702797001110642e-05, + "loss": 3.2969, + "step": 172 + }, + { + "epoch": 1.06, + "learning_rate": 1.9697969360350098e-05, + "loss": 3.3789, + "step": 173 + }, + { + "epoch": 1.07, + "learning_rate": 1.969310342682951e-05, + "loss": 3.5625, + "step": 174 + }, + { + "epoch": 1.07, + "learning_rate": 1.9688199219762183e-05, + "loss": 3.9297, + "step": 175 + }, + { + "epoch": 1.08, + "learning_rate": 1.9683256758512544e-05, + "loss": 3.6094, + "step": 176 + }, + { + "epoch": 1.09, + "learning_rate": 1.967827606259607e-05, + "loss": 3.5547, + "step": 177 + }, + { + "epoch": 1.09, + "learning_rate": 1.96732571516792e-05, + "loss": 3.5742, + "step": 178 + }, + { + "epoch": 1.1, + "learning_rate": 1.9668200045579283e-05, + "loss": 3.3047, + "step": 179 + }, + { + "epoch": 1.1, + "learning_rate": 1.9663104764264468e-05, + "loss": 3.5117, + "step": 180 + }, + { + "epoch": 1.11, + "learning_rate": 1.9657971327853644e-05, + "loss": 3.4805, + "step": 181 + }, + { + "epoch": 1.12, + "learning_rate": 1.9652799756616364e-05, + "loss": 3.4453, + "step": 182 + }, + { + "epoch": 1.12, + "learning_rate": 1.964759007097275e-05, + "loss": 3.5195, + "step": 183 + }, + { + "epoch": 1.13, + "learning_rate": 1.964234229149342e-05, + "loss": 3.375, + "step": 184 + }, + { + "epoch": 1.13, + "learning_rate": 1.963705643889941e-05, + "loss": 3.4648, + "step": 185 + }, + { + "epoch": 1.14, + "learning_rate": 1.9631732534062088e-05, + "loss": 3.6719, + "step": 186 + }, + { + "epoch": 1.15, + "learning_rate": 1.962637059800307e-05, + "loss": 3.582, + "step": 187 + }, + { + "epoch": 1.15, + "learning_rate": 1.9620970651894146e-05, + "loss": 3.8086, + "step": 188 + }, + { + "epoch": 1.16, + "learning_rate": 1.9615532717057185e-05, + "loss": 3.5234, + "step": 189 + }, + { + "epoch": 1.17, + "learning_rate": 1.9610056814964053e-05, + "loss": 3.6016, + "step": 190 + }, + { + "epoch": 1.17, + "learning_rate": 1.9604542967236535e-05, + "loss": 3.6172, + "step": 191 + }, + { + "epoch": 1.18, + "learning_rate": 1.9598991195646252e-05, + "loss": 3.3477, + "step": 192 + }, + { + "epoch": 1.18, + "learning_rate": 1.959340152211455e-05, + "loss": 3.3125, + "step": 193 + }, + { + "epoch": 1.19, + "learning_rate": 1.9587773968712458e-05, + "loss": 3.7891, + "step": 194 + }, + { + "epoch": 1.2, + "learning_rate": 1.958210855766055e-05, + "loss": 3.8008, + "step": 195 + }, + { + "epoch": 1.2, + "learning_rate": 1.95764053113289e-05, + "loss": 3.5156, + "step": 196 + }, + { + "epoch": 1.21, + "learning_rate": 1.9570664252236966e-05, + "loss": 3.9531, + "step": 197 + }, + { + "epoch": 1.21, + "learning_rate": 1.956488540305351e-05, + "loss": 3.3164, + "step": 198 + }, + { + "epoch": 1.22, + "learning_rate": 1.9559068786596526e-05, + "loss": 3.6797, + "step": 199 + }, + { + "epoch": 1.23, + "learning_rate": 1.9553214425833108e-05, + "loss": 3.4844, + "step": 200 + }, + { + "epoch": 1.23, + "learning_rate": 1.9547322343879397e-05, + "loss": 3.6641, + "step": 201 + }, + { + "epoch": 1.24, + "learning_rate": 1.954139256400049e-05, + "loss": 3.5195, + "step": 202 + }, + { + "epoch": 1.25, + "learning_rate": 1.9535425109610317e-05, + "loss": 3.7773, + "step": 203 + }, + { + "epoch": 1.25, + "learning_rate": 1.9529420004271568e-05, + "loss": 3.3711, + "step": 204 + }, + { + "epoch": 1.26, + "learning_rate": 1.952337727169561e-05, + "loss": 3.8828, + "step": 205 + }, + { + "epoch": 1.26, + "learning_rate": 1.951729693574238e-05, + "loss": 3.5781, + "step": 206 + }, + { + "epoch": 1.27, + "learning_rate": 1.9511179020420284e-05, + "loss": 3.457, + "step": 207 + }, + { + "epoch": 1.28, + "learning_rate": 1.950502354988612e-05, + "loss": 3.5312, + "step": 208 + }, + { + "epoch": 1.28, + "learning_rate": 1.9498830548444972e-05, + "loss": 3.6367, + "step": 209 + }, + { + "epoch": 1.29, + "learning_rate": 1.9492600040550114e-05, + "loss": 3.5625, + "step": 210 + }, + { + "epoch": 1.29, + "learning_rate": 1.948633205080292e-05, + "loss": 3.5703, + "step": 211 + }, + { + "epoch": 1.3, + "learning_rate": 1.948002660395276e-05, + "loss": 3.7461, + "step": 212 + }, + { + "epoch": 1.31, + "learning_rate": 1.9473683724896898e-05, + "loss": 3.7148, + "step": 213 + }, + { + "epoch": 1.31, + "learning_rate": 1.9467303438680414e-05, + "loss": 3.5039, + "step": 214 + }, + { + "epoch": 1.32, + "learning_rate": 1.946088577049608e-05, + "loss": 3.5273, + "step": 215 + }, + { + "epoch": 1.33, + "learning_rate": 1.9454430745684276e-05, + "loss": 3.7188, + "step": 216 + }, + { + "epoch": 1.33, + "learning_rate": 1.944793838973289e-05, + "loss": 3.5586, + "step": 217 + }, + { + "epoch": 1.34, + "learning_rate": 1.94414087282772e-05, + "loss": 3.6602, + "step": 218 + }, + { + "epoch": 1.34, + "learning_rate": 1.9434841787099804e-05, + "loss": 3.3633, + "step": 219 + }, + { + "epoch": 1.35, + "learning_rate": 1.9428237592130487e-05, + "loss": 3.2969, + "step": 220 + }, + { + "epoch": 1.36, + "learning_rate": 1.9421596169446135e-05, + "loss": 3.7031, + "step": 221 + }, + { + "epoch": 1.36, + "learning_rate": 1.941491754527064e-05, + "loss": 3.4375, + "step": 222 + }, + { + "epoch": 1.37, + "learning_rate": 1.940820174597476e-05, + "loss": 3.6016, + "step": 223 + }, + { + "epoch": 1.37, + "learning_rate": 1.9401448798076064e-05, + "loss": 3.6406, + "step": 224 + }, + { + "epoch": 1.38, + "learning_rate": 1.9394658728238797e-05, + "loss": 3.5273, + "step": 225 + }, + { + "epoch": 1.39, + "learning_rate": 1.9387831563273775e-05, + "loss": 3.4336, + "step": 226 + }, + { + "epoch": 1.39, + "learning_rate": 1.938096733013829e-05, + "loss": 3.4141, + "step": 227 + }, + { + "epoch": 1.4, + "learning_rate": 1.9374066055936004e-05, + "loss": 3.6797, + "step": 228 + }, + { + "epoch": 1.4, + "learning_rate": 1.9367127767916828e-05, + "loss": 3.6953, + "step": 229 + }, + { + "epoch": 1.41, + "learning_rate": 1.9360152493476828e-05, + "loss": 3.6797, + "step": 230 + }, + { + "epoch": 1.42, + "learning_rate": 1.9353140260158108e-05, + "loss": 3.5938, + "step": 231 + }, + { + "epoch": 1.42, + "learning_rate": 1.9346091095648712e-05, + "loss": 3.9492, + "step": 232 + }, + { + "epoch": 1.43, + "learning_rate": 1.93390050277825e-05, + "loss": 3.6172, + "step": 233 + }, + { + "epoch": 1.44, + "learning_rate": 1.9331882084539056e-05, + "loss": 3.5977, + "step": 234 + }, + { + "epoch": 1.44, + "learning_rate": 1.932472229404356e-05, + "loss": 3.5703, + "step": 235 + }, + { + "epoch": 1.45, + "learning_rate": 1.9317525684566686e-05, + "loss": 3.4336, + "step": 236 + }, + { + "epoch": 1.45, + "learning_rate": 1.931029228452449e-05, + "loss": 3.5508, + "step": 237 + }, + { + "epoch": 1.46, + "learning_rate": 1.9303022122478303e-05, + "loss": 3.7188, + "step": 238 + }, + { + "epoch": 1.47, + "learning_rate": 1.9295715227134595e-05, + "loss": 3.4766, + "step": 239 + }, + { + "epoch": 1.47, + "learning_rate": 1.9288371627344894e-05, + "loss": 3.6484, + "step": 240 + }, + { + "epoch": 1.48, + "learning_rate": 1.9280991352105656e-05, + "loss": 3.5703, + "step": 241 + }, + { + "epoch": 1.48, + "learning_rate": 1.9273574430558143e-05, + "loss": 3.4336, + "step": 242 + }, + { + "epoch": 1.49, + "learning_rate": 1.9266120891988326e-05, + "loss": 3.5469, + "step": 243 + }, + { + "epoch": 1.5, + "learning_rate": 1.925863076582674e-05, + "loss": 3.2812, + "step": 244 + }, + { + "epoch": 1.5, + "learning_rate": 1.9251104081648423e-05, + "loss": 3.4102, + "step": 245 + }, + { + "epoch": 1.51, + "learning_rate": 1.9243540869172724e-05, + "loss": 3.332, + "step": 246 + }, + { + "epoch": 1.52, + "learning_rate": 1.9235941158263253e-05, + "loss": 3.5039, + "step": 247 + }, + { + "epoch": 1.52, + "learning_rate": 1.922830497892772e-05, + "loss": 3.4883, + "step": 248 + }, + { + "epoch": 1.53, + "learning_rate": 1.9220632361317843e-05, + "loss": 3.5664, + "step": 249 + }, + { + "epoch": 1.53, + "learning_rate": 1.9212923335729206e-05, + "loss": 3.5195, + "step": 250 + }, + { + "epoch": 1.54, + "learning_rate": 1.920517793260116e-05, + "loss": 3.4531, + "step": 251 + }, + { + "epoch": 1.55, + "learning_rate": 1.9197396182516694e-05, + "loss": 3.7734, + "step": 252 + }, + { + "epoch": 1.55, + "learning_rate": 1.918957811620231e-05, + "loss": 3.6953, + "step": 253 + }, + { + "epoch": 1.56, + "learning_rate": 1.9181723764527902e-05, + "loss": 3.6133, + "step": 254 + }, + { + "epoch": 1.56, + "learning_rate": 1.917383315850665e-05, + "loss": 3.5391, + "step": 255 + }, + { + "epoch": 1.57, + "learning_rate": 1.9165906329294875e-05, + "loss": 3.5898, + "step": 256 + }, + { + "epoch": 1.58, + "learning_rate": 1.9157943308191934e-05, + "loss": 3.7188, + "step": 257 + }, + { + "epoch": 1.58, + "learning_rate": 1.914994412664008e-05, + "loss": 3.8125, + "step": 258 + }, + { + "epoch": 1.59, + "learning_rate": 1.9141908816224356e-05, + "loss": 3.875, + "step": 259 + }, + { + "epoch": 1.6, + "learning_rate": 1.9133837408672456e-05, + "loss": 3.4102, + "step": 260 + }, + { + "epoch": 1.6, + "learning_rate": 1.9125729935854606e-05, + "loss": 3.2344, + "step": 261 + }, + { + "epoch": 1.61, + "learning_rate": 1.9117586429783433e-05, + "loss": 3.7656, + "step": 262 + }, + { + "epoch": 1.61, + "learning_rate": 1.910940692261385e-05, + "loss": 3.6992, + "step": 263 + }, + { + "epoch": 1.62, + "learning_rate": 1.9101191446642917e-05, + "loss": 3.4766, + "step": 264 + }, + { + "epoch": 1.63, + "learning_rate": 1.909294003430972e-05, + "loss": 3.1211, + "step": 265 + }, + { + "epoch": 1.63, + "learning_rate": 1.9084652718195237e-05, + "loss": 3.4102, + "step": 266 + }, + { + "epoch": 1.64, + "learning_rate": 1.907632953102222e-05, + "loss": 3.6602, + "step": 267 + }, + { + "epoch": 1.64, + "learning_rate": 1.906797050565505e-05, + "loss": 3.6836, + "step": 268 + }, + { + "epoch": 1.65, + "learning_rate": 1.9059575675099622e-05, + "loss": 3.582, + "step": 269 + }, + { + "epoch": 1.66, + "learning_rate": 1.9051145072503216e-05, + "loss": 3.6172, + "step": 270 + }, + { + "epoch": 1.66, + "learning_rate": 1.9042678731154337e-05, + "loss": 3.457, + "step": 271 + }, + { + "epoch": 1.67, + "learning_rate": 1.9034176684482638e-05, + "loss": 3.3398, + "step": 272 + }, + { + "epoch": 1.67, + "learning_rate": 1.9025638966058722e-05, + "loss": 3.4883, + "step": 273 + }, + { + "epoch": 1.68, + "learning_rate": 1.901706560959407e-05, + "loss": 3.6602, + "step": 274 + }, + { + "epoch": 1.69, + "learning_rate": 1.900845664894086e-05, + "loss": 3.6797, + "step": 275 + }, + { + "epoch": 1.69, + "learning_rate": 1.8999812118091877e-05, + "loss": 3.4766, + "step": 276 + }, + { + "epoch": 1.7, + "learning_rate": 1.8991132051180332e-05, + "loss": 3.3945, + "step": 277 + }, + { + "epoch": 1.71, + "learning_rate": 1.898241648247977e-05, + "loss": 3.2461, + "step": 278 + }, + { + "epoch": 1.71, + "learning_rate": 1.8973665446403902e-05, + "loss": 3.4023, + "step": 279 + }, + { + "epoch": 1.72, + "learning_rate": 1.8964878977506496e-05, + "loss": 3.4492, + "step": 280 + }, + { + "epoch": 1.72, + "learning_rate": 1.895605711048122e-05, + "loss": 3.5, + "step": 281 + }, + { + "epoch": 1.73, + "learning_rate": 1.8947199880161515e-05, + "loss": 3.4531, + "step": 282 + }, + { + "epoch": 1.74, + "learning_rate": 1.8938307321520453e-05, + "loss": 3.6523, + "step": 283 + }, + { + "epoch": 1.74, + "learning_rate": 1.89293794696706e-05, + "loss": 3.6445, + "step": 284 + }, + { + "epoch": 1.75, + "learning_rate": 1.8920416359863885e-05, + "loss": 3.3711, + "step": 285 + }, + { + "epoch": 1.75, + "learning_rate": 1.8911418027491453e-05, + "loss": 3.4414, + "step": 286 + }, + { + "epoch": 1.76, + "learning_rate": 1.8902384508083518e-05, + "loss": 3.2656, + "step": 287 + }, + { + "epoch": 1.77, + "learning_rate": 1.8893315837309235e-05, + "loss": 3.6289, + "step": 288 + }, + { + "epoch": 1.77, + "learning_rate": 1.8884212050976568e-05, + "loss": 3.4023, + "step": 289 + }, + { + "epoch": 1.78, + "learning_rate": 1.8875073185032116e-05, + "loss": 3.6914, + "step": 290 + }, + { + "epoch": 1.79, + "learning_rate": 1.8865899275561003e-05, + "loss": 3.3281, + "step": 291 + }, + { + "epoch": 1.79, + "learning_rate": 1.885669035878672e-05, + "loss": 3.7227, + "step": 292 + }, + { + "epoch": 1.8, + "learning_rate": 1.8847446471070985e-05, + "loss": 3.2891, + "step": 293 + }, + { + "epoch": 1.8, + "learning_rate": 1.8838167648913606e-05, + "loss": 3.4844, + "step": 294 + }, + { + "epoch": 1.81, + "learning_rate": 1.882885392895232e-05, + "loss": 3.7617, + "step": 295 + }, + { + "epoch": 1.82, + "learning_rate": 1.881950534796267e-05, + "loss": 3.3945, + "step": 296 + }, + { + "epoch": 1.82, + "learning_rate": 1.8810121942857848e-05, + "loss": 3.5547, + "step": 297 + }, + { + "epoch": 1.83, + "learning_rate": 1.8800703750688536e-05, + "loss": 3.6484, + "step": 298 + }, + { + "epoch": 1.83, + "learning_rate": 1.8791250808642792e-05, + "loss": 3.668, + "step": 299 + }, + { + "epoch": 1.84, + "learning_rate": 1.8781763154045873e-05, + "loss": 3.5664, + "step": 300 + }, + { + "epoch": 1.85, + "learning_rate": 1.877224082436011e-05, + "loss": 3.2695, + "step": 301 + }, + { + "epoch": 1.85, + "learning_rate": 1.8762683857184738e-05, + "loss": 3.5781, + "step": 302 + }, + { + "epoch": 1.86, + "learning_rate": 1.8753092290255765e-05, + "loss": 3.8359, + "step": 303 + }, + { + "epoch": 1.87, + "learning_rate": 1.8743466161445823e-05, + "loss": 3.3242, + "step": 304 + }, + { + "epoch": 1.87, + "learning_rate": 1.8733805508764e-05, + "loss": 3.3086, + "step": 305 + }, + { + "epoch": 1.88, + "learning_rate": 1.872411037035572e-05, + "loss": 3.4531, + "step": 306 + }, + { + "epoch": 1.88, + "learning_rate": 1.8714380784502553e-05, + "loss": 3.5586, + "step": 307 + }, + { + "epoch": 1.89, + "learning_rate": 1.870461678962211e-05, + "loss": 3.6797, + "step": 308 + }, + { + "epoch": 1.9, + "learning_rate": 1.869481842426784e-05, + "loss": 3.4609, + "step": 309 + }, + { + "epoch": 1.9, + "learning_rate": 1.8684985727128936e-05, + "loss": 3.6289, + "step": 310 + }, + { + "epoch": 1.91, + "learning_rate": 1.8675118737030123e-05, + "loss": 3.4844, + "step": 311 + }, + { + "epoch": 1.91, + "learning_rate": 1.866521749293155e-05, + "loss": 3.7461, + "step": 312 + }, + { + "epoch": 1.92, + "learning_rate": 1.8655282033928618e-05, + "loss": 3.2852, + "step": 313 + }, + { + "epoch": 1.93, + "learning_rate": 1.8645312399251818e-05, + "loss": 3.6875, + "step": 314 + }, + { + "epoch": 1.93, + "learning_rate": 1.8635308628266586e-05, + "loss": 3.2266, + "step": 315 + }, + { + "epoch": 1.94, + "learning_rate": 1.8625270760473164e-05, + "loss": 3.5977, + "step": 316 + }, + { + "epoch": 1.94, + "learning_rate": 1.8615198835506393e-05, + "loss": 3.6133, + "step": 317 + }, + { + "epoch": 1.95, + "learning_rate": 1.8605092893135626e-05, + "loss": 3.6172, + "step": 318 + }, + { + "epoch": 1.96, + "learning_rate": 1.8594952973264512e-05, + "loss": 3.4766, + "step": 319 + }, + { + "epoch": 1.96, + "learning_rate": 1.8584779115930866e-05, + "loss": 3.4766, + "step": 320 + }, + { + "epoch": 1.97, + "learning_rate": 1.857457136130651e-05, + "loss": 3.6875, + "step": 321 + }, + { + "epoch": 1.98, + "learning_rate": 1.856432974969711e-05, + "loss": 3.3359, + "step": 322 + }, + { + "epoch": 1.98, + "learning_rate": 1.855405432154201e-05, + "loss": 3.5, + "step": 323 + }, + { + "epoch": 1.99, + "learning_rate": 1.8543745117414094e-05, + "loss": 3.5547, + "step": 324 + }, + { + "epoch": 1.99, + "learning_rate": 1.8533402178019596e-05, + "loss": 3.1367, + "step": 325 + }, + { + "epoch": 2.0, + "learning_rate": 1.8523025544197964e-05, + "loss": 3.4141, + "step": 326 + }, + { + "epoch": 2.01, + "learning_rate": 1.8512615256921692e-05, + "loss": 3.0078, + "step": 327 + }, + { + "epoch": 2.01, + "learning_rate": 1.8502171357296144e-05, + "loss": 3.0586, + "step": 328 + }, + { + "epoch": 2.02, + "learning_rate": 1.8491693886559413e-05, + "loss": 3.1953, + "step": 329 + }, + { + "epoch": 2.02, + "learning_rate": 1.848118288608215e-05, + "loss": 3.0625, + "step": 330 + }, + { + "epoch": 2.03, + "learning_rate": 1.8470638397367397e-05, + "loss": 3.25, + "step": 331 + }, + { + "epoch": 2.04, + "learning_rate": 1.846006046205042e-05, + "loss": 3.2422, + "step": 332 + }, + { + "epoch": 2.04, + "learning_rate": 1.8449449121898552e-05, + "loss": 2.9258, + "step": 333 + }, + { + "epoch": 2.05, + "learning_rate": 1.8438804418811038e-05, + "loss": 2.9883, + "step": 334 + }, + { + "epoch": 2.06, + "learning_rate": 1.842812639481884e-05, + "loss": 3.3203, + "step": 335 + }, + { + "epoch": 2.06, + "learning_rate": 1.84174150920845e-05, + "loss": 3.0195, + "step": 336 + }, + { + "epoch": 2.07, + "learning_rate": 1.8406670552901958e-05, + "loss": 2.9375, + "step": 337 + }, + { + "epoch": 2.07, + "learning_rate": 1.839589281969639e-05, + "loss": 3.2578, + "step": 338 + }, + { + "epoch": 2.08, + "learning_rate": 1.8385081935024044e-05, + "loss": 3.0469, + "step": 339 + }, + { + "epoch": 2.09, + "learning_rate": 1.837423794157206e-05, + "loss": 3.1367, + "step": 340 + }, + { + "epoch": 2.09, + "learning_rate": 1.836336088215831e-05, + "loss": 3.0234, + "step": 341 + }, + { + "epoch": 2.1, + "learning_rate": 1.835245079973124e-05, + "loss": 2.8242, + "step": 342 + }, + { + "epoch": 2.1, + "learning_rate": 1.834150773736967e-05, + "loss": 2.9414, + "step": 343 + }, + { + "epoch": 2.11, + "learning_rate": 1.8330531738282656e-05, + "loss": 3.0742, + "step": 344 + }, + { + "epoch": 2.12, + "learning_rate": 1.8319522845809306e-05, + "loss": 3.0625, + "step": 345 + }, + { + "epoch": 2.12, + "learning_rate": 1.8308481103418597e-05, + "loss": 2.8828, + "step": 346 + }, + { + "epoch": 2.13, + "learning_rate": 1.8297406554709228e-05, + "loss": 3.1836, + "step": 347 + }, + { + "epoch": 2.13, + "learning_rate": 1.8286299243409424e-05, + "loss": 2.8086, + "step": 348 + }, + { + "epoch": 2.14, + "learning_rate": 1.8275159213376783e-05, + "loss": 2.9258, + "step": 349 + }, + { + "epoch": 2.15, + "learning_rate": 1.826398650859809e-05, + "loss": 3.0977, + "step": 350 + }, + { + "epoch": 2.15, + "learning_rate": 1.8252781173189148e-05, + "loss": 3.3086, + "step": 351 + }, + { + "epoch": 2.16, + "learning_rate": 1.82415432513946e-05, + "loss": 3.0117, + "step": 352 + }, + { + "epoch": 2.17, + "learning_rate": 1.823027278758776e-05, + "loss": 2.957, + "step": 353 + }, + { + "epoch": 2.17, + "learning_rate": 1.821896982627044e-05, + "loss": 3.2617, + "step": 354 + }, + { + "epoch": 2.18, + "learning_rate": 1.8207634412072765e-05, + "loss": 3.1172, + "step": 355 + }, + { + "epoch": 2.18, + "learning_rate": 1.8196266589753e-05, + "loss": 2.8867, + "step": 356 + }, + { + "epoch": 2.19, + "learning_rate": 1.818486640419737e-05, + "loss": 3.2539, + "step": 357 + }, + { + "epoch": 2.2, + "learning_rate": 1.81734339004199e-05, + "loss": 2.8633, + "step": 358 + }, + { + "epoch": 2.2, + "learning_rate": 1.816196912356222e-05, + "loss": 3.1016, + "step": 359 + }, + { + "epoch": 2.21, + "learning_rate": 1.8150472118893382e-05, + "loss": 3.0898, + "step": 360 + }, + { + "epoch": 2.21, + "learning_rate": 1.8138942931809702e-05, + "loss": 2.9453, + "step": 361 + }, + { + "epoch": 2.22, + "learning_rate": 1.8127381607834563e-05, + "loss": 3.2383, + "step": 362 + }, + { + "epoch": 2.23, + "learning_rate": 1.8115788192618247e-05, + "loss": 3.0703, + "step": 363 + }, + { + "epoch": 2.23, + "learning_rate": 1.8104162731937746e-05, + "loss": 3.0977, + "step": 364 + }, + { + "epoch": 2.24, + "learning_rate": 1.8092505271696582e-05, + "loss": 3.2344, + "step": 365 + }, + { + "epoch": 2.25, + "learning_rate": 1.808081585792463e-05, + "loss": 2.7617, + "step": 366 + }, + { + "epoch": 2.25, + "learning_rate": 1.8069094536777938e-05, + "loss": 3.0898, + "step": 367 + }, + { + "epoch": 2.26, + "learning_rate": 1.805734135453854e-05, + "loss": 3.0781, + "step": 368 + }, + { + "epoch": 2.26, + "learning_rate": 1.8045556357614273e-05, + "loss": 3.4922, + "step": 369 + }, + { + "epoch": 2.27, + "learning_rate": 1.8033739592538598e-05, + "loss": 3.1211, + "step": 370 + }, + { + "epoch": 2.28, + "learning_rate": 1.8021891105970405e-05, + "loss": 2.9453, + "step": 371 + }, + { + "epoch": 2.28, + "learning_rate": 1.8010010944693846e-05, + "loss": 3.1016, + "step": 372 + }, + { + "epoch": 2.29, + "learning_rate": 1.7998099155618147e-05, + "loss": 3.0117, + "step": 373 + }, + { + "epoch": 2.29, + "learning_rate": 1.7986155785777402e-05, + "loss": 3.1523, + "step": 374 + }, + { + "epoch": 2.3, + "learning_rate": 1.7974180882330413e-05, + "loss": 3.0352, + "step": 375 + }, + { + "epoch": 2.31, + "learning_rate": 1.7962174492560492e-05, + "loss": 2.8711, + "step": 376 + }, + { + "epoch": 2.31, + "learning_rate": 1.7950136663875274e-05, + "loss": 3.1953, + "step": 377 + }, + { + "epoch": 2.32, + "learning_rate": 1.7938067443806538e-05, + "loss": 3.2188, + "step": 378 + }, + { + "epoch": 2.33, + "learning_rate": 1.7925966880009998e-05, + "loss": 2.8203, + "step": 379 + }, + { + "epoch": 2.33, + "learning_rate": 1.791383502026515e-05, + "loss": 3.1172, + "step": 380 + }, + { + "epoch": 2.34, + "learning_rate": 1.790167191247504e-05, + "loss": 2.9414, + "step": 381 + }, + { + "epoch": 2.34, + "learning_rate": 1.7889477604666124e-05, + "loss": 2.8398, + "step": 382 + }, + { + "epoch": 2.35, + "learning_rate": 1.787725214498803e-05, + "loss": 3.1836, + "step": 383 + }, + { + "epoch": 2.36, + "learning_rate": 1.78649955817134e-05, + "loss": 3.0625, + "step": 384 + }, + { + "epoch": 2.36, + "learning_rate": 1.785270796323769e-05, + "loss": 2.8945, + "step": 385 + }, + { + "epoch": 2.37, + "learning_rate": 1.784038933807898e-05, + "loss": 2.9688, + "step": 386 + }, + { + "epoch": 2.37, + "learning_rate": 1.7828039754877778e-05, + "loss": 3.0352, + "step": 387 + }, + { + "epoch": 2.38, + "learning_rate": 1.7815659262396825e-05, + "loss": 3.0977, + "step": 388 + }, + { + "epoch": 2.39, + "learning_rate": 1.780324790952092e-05, + "loss": 3.1445, + "step": 389 + }, + { + "epoch": 2.39, + "learning_rate": 1.7790805745256703e-05, + "loss": 2.9766, + "step": 390 + }, + { + "epoch": 2.4, + "learning_rate": 1.7778332818732492e-05, + "loss": 3.0547, + "step": 391 + }, + { + "epoch": 2.4, + "learning_rate": 1.7765829179198048e-05, + "loss": 3.1758, + "step": 392 + }, + { + "epoch": 2.41, + "learning_rate": 1.7753294876024417e-05, + "loss": 3.0625, + "step": 393 + }, + { + "epoch": 2.42, + "learning_rate": 1.7740729958703725e-05, + "loss": 2.9297, + "step": 394 + }, + { + "epoch": 2.42, + "learning_rate": 1.7728134476848965e-05, + "loss": 3.0586, + "step": 395 + }, + { + "epoch": 2.43, + "learning_rate": 1.7715508480193832e-05, + "loss": 3.0039, + "step": 396 + }, + { + "epoch": 2.44, + "learning_rate": 1.7702852018592493e-05, + "loss": 2.8086, + "step": 397 + }, + { + "epoch": 2.44, + "learning_rate": 1.769016514201942e-05, + "loss": 2.9336, + "step": 398 + }, + { + "epoch": 2.45, + "learning_rate": 1.7677447900569166e-05, + "loss": 3.4219, + "step": 399 + }, + { + "epoch": 2.45, + "learning_rate": 1.7664700344456198e-05, + "loss": 3.0625, + "step": 400 + }, + { + "epoch": 2.46, + "learning_rate": 1.765192252401467e-05, + "loss": 3.2617, + "step": 401 + }, + { + "epoch": 2.47, + "learning_rate": 1.7639114489698238e-05, + "loss": 3.0977, + "step": 402 + }, + { + "epoch": 2.47, + "learning_rate": 1.762627629207986e-05, + "loss": 3.0703, + "step": 403 + }, + { + "epoch": 2.48, + "learning_rate": 1.7613407981851586e-05, + "loss": 3.0938, + "step": 404 + }, + { + "epoch": 2.48, + "learning_rate": 1.760050960982439e-05, + "loss": 3.3047, + "step": 405 + }, + { + "epoch": 2.49, + "learning_rate": 1.758758122692791e-05, + "loss": 2.8867, + "step": 406 + }, + { + "epoch": 2.5, + "learning_rate": 1.757462288421032e-05, + "loss": 3.2148, + "step": 407 + }, + { + "epoch": 2.5, + "learning_rate": 1.7561634632838062e-05, + "loss": 3.1172, + "step": 408 + }, + { + "epoch": 2.51, + "learning_rate": 1.7548616524095697e-05, + "loss": 2.9141, + "step": 409 + }, + { + "epoch": 2.52, + "learning_rate": 1.753556860938566e-05, + "loss": 3.0938, + "step": 410 + }, + { + "epoch": 2.52, + "learning_rate": 1.7522490940228086e-05, + "loss": 2.8672, + "step": 411 + }, + { + "epoch": 2.53, + "learning_rate": 1.7509383568260597e-05, + "loss": 3.1641, + "step": 412 + }, + { + "epoch": 2.53, + "learning_rate": 1.749624654523809e-05, + "loss": 2.9883, + "step": 413 + }, + { + "epoch": 2.54, + "learning_rate": 1.7483079923032543e-05, + "loss": 3.0898, + "step": 414 + }, + { + "epoch": 2.55, + "learning_rate": 1.7469883753632817e-05, + "loss": 3.0391, + "step": 415 + }, + { + "epoch": 2.55, + "learning_rate": 1.745665808914443e-05, + "loss": 3.1055, + "step": 416 + }, + { + "epoch": 2.56, + "learning_rate": 1.744340298178936e-05, + "loss": 3.0664, + "step": 417 + }, + { + "epoch": 2.56, + "learning_rate": 1.743011848390585e-05, + "loss": 2.8672, + "step": 418 + }, + { + "epoch": 2.57, + "learning_rate": 1.7416804647948194e-05, + "loss": 3.2891, + "step": 419 + }, + { + "epoch": 2.58, + "learning_rate": 1.740346152648652e-05, + "loss": 2.9805, + "step": 420 + }, + { + "epoch": 2.58, + "learning_rate": 1.7390089172206594e-05, + "loss": 2.7305, + "step": 421 + }, + { + "epoch": 2.59, + "learning_rate": 1.7376687637909607e-05, + "loss": 3.0547, + "step": 422 + }, + { + "epoch": 2.6, + "learning_rate": 1.7363256976511972e-05, + "loss": 2.7773, + "step": 423 + }, + { + "epoch": 2.6, + "learning_rate": 1.7349797241045115e-05, + "loss": 3.2188, + "step": 424 + }, + { + "epoch": 2.61, + "learning_rate": 1.733630848465525e-05, + "loss": 3.0156, + "step": 425 + }, + { + "epoch": 2.61, + "learning_rate": 1.732279076060319e-05, + "loss": 3.1328, + "step": 426 + }, + { + "epoch": 2.62, + "learning_rate": 1.730924412226413e-05, + "loss": 3.0664, + "step": 427 + }, + { + "epoch": 2.63, + "learning_rate": 1.729566862312742e-05, + "loss": 2.9102, + "step": 428 + }, + { + "epoch": 2.63, + "learning_rate": 1.7282064316796387e-05, + "loss": 3.0508, + "step": 429 + }, + { + "epoch": 2.64, + "learning_rate": 1.726843125698809e-05, + "loss": 2.8711, + "step": 430 + }, + { + "epoch": 2.64, + "learning_rate": 1.7254769497533128e-05, + "loss": 2.75, + "step": 431 + }, + { + "epoch": 2.65, + "learning_rate": 1.724107909237542e-05, + "loss": 2.8438, + "step": 432 + }, + { + "epoch": 2.66, + "learning_rate": 1.7227360095571992e-05, + "loss": 2.9883, + "step": 433 + }, + { + "epoch": 2.66, + "learning_rate": 1.721361256129277e-05, + "loss": 3.2461, + "step": 434 + }, + { + "epoch": 2.67, + "learning_rate": 1.719983654382036e-05, + "loss": 3.0781, + "step": 435 + }, + { + "epoch": 2.67, + "learning_rate": 1.7186032097549822e-05, + "loss": 3.1523, + "step": 436 + }, + { + "epoch": 2.68, + "learning_rate": 1.717219927698849e-05, + "loss": 2.832, + "step": 437 + }, + { + "epoch": 2.69, + "learning_rate": 1.7158338136755724e-05, + "loss": 3.2617, + "step": 438 + }, + { + "epoch": 2.69, + "learning_rate": 1.7144448731582698e-05, + "loss": 3.0781, + "step": 439 + }, + { + "epoch": 2.7, + "learning_rate": 1.7130531116312202e-05, + "loss": 3.1641, + "step": 440 + }, + { + "epoch": 2.71, + "learning_rate": 1.7116585345898413e-05, + "loss": 3.1484, + "step": 441 + }, + { + "epoch": 2.71, + "learning_rate": 1.7102611475406676e-05, + "loss": 3.2656, + "step": 442 + }, + { + "epoch": 2.72, + "learning_rate": 1.7088609560013284e-05, + "loss": 3.0938, + "step": 443 + }, + { + "epoch": 2.72, + "learning_rate": 1.7074579655005282e-05, + "loss": 2.9648, + "step": 444 + }, + { + "epoch": 2.73, + "learning_rate": 1.7060521815780225e-05, + "loss": 3.1328, + "step": 445 + }, + { + "epoch": 2.74, + "learning_rate": 1.704643609784596e-05, + "loss": 3.1211, + "step": 446 + }, + { + "epoch": 2.74, + "learning_rate": 1.7032322556820428e-05, + "loss": 3.1719, + "step": 447 + }, + { + "epoch": 2.75, + "learning_rate": 1.7018181248431416e-05, + "loss": 2.9883, + "step": 448 + }, + { + "epoch": 2.75, + "learning_rate": 1.700401222851636e-05, + "loss": 3.1172, + "step": 449 + }, + { + "epoch": 2.76, + "learning_rate": 1.698981555302212e-05, + "loss": 2.9531, + "step": 450 + }, + { + "epoch": 2.77, + "learning_rate": 1.6975591278004747e-05, + "loss": 2.9375, + "step": 451 + }, + { + "epoch": 2.77, + "learning_rate": 1.696133945962927e-05, + "loss": 3.1875, + "step": 452 + }, + { + "epoch": 2.78, + "learning_rate": 1.6947060154169473e-05, + "loss": 3.0742, + "step": 453 + }, + { + "epoch": 2.79, + "learning_rate": 1.6932753418007683e-05, + "loss": 3.0977, + "step": 454 + }, + { + "epoch": 2.79, + "learning_rate": 1.691841930763453e-05, + "loss": 2.9531, + "step": 455 + }, + { + "epoch": 2.8, + "learning_rate": 1.690405787964873e-05, + "loss": 2.9609, + "step": 456 + }, + { + "epoch": 2.8, + "learning_rate": 1.688966919075687e-05, + "loss": 2.7578, + "step": 457 + }, + { + "epoch": 2.81, + "learning_rate": 1.687525329777317e-05, + "loss": 2.9961, + "step": 458 + }, + { + "epoch": 2.82, + "learning_rate": 1.686081025761928e-05, + "loss": 3.3203, + "step": 459 + }, + { + "epoch": 2.82, + "learning_rate": 1.684634012732403e-05, + "loss": 2.9258, + "step": 460 + }, + { + "epoch": 2.83, + "learning_rate": 1.6831842964023212e-05, + "loss": 3.1445, + "step": 461 + }, + { + "epoch": 2.83, + "learning_rate": 1.6817318824959375e-05, + "loss": 3.2617, + "step": 462 + }, + { + "epoch": 2.84, + "learning_rate": 1.680276776748157e-05, + "loss": 2.9883, + "step": 463 + }, + { + "epoch": 2.85, + "learning_rate": 1.6788189849045135e-05, + "loss": 2.9219, + "step": 464 + }, + { + "epoch": 2.85, + "learning_rate": 1.6773585127211478e-05, + "loss": 2.8281, + "step": 465 + }, + { + "epoch": 2.86, + "learning_rate": 1.6758953659647838e-05, + "loss": 3.0312, + "step": 466 + }, + { + "epoch": 2.87, + "learning_rate": 1.6744295504127055e-05, + "loss": 3.2461, + "step": 467 + }, + { + "epoch": 2.87, + "learning_rate": 1.6729610718527357e-05, + "loss": 3.1562, + "step": 468 + }, + { + "epoch": 2.88, + "learning_rate": 1.6714899360832118e-05, + "loss": 2.9023, + "step": 469 + }, + { + "epoch": 2.88, + "learning_rate": 1.6700161489129624e-05, + "loss": 3.0898, + "step": 470 + }, + { + "epoch": 2.89, + "learning_rate": 1.668539716161287e-05, + "loss": 2.9414, + "step": 471 + }, + { + "epoch": 2.9, + "learning_rate": 1.667060643657929e-05, + "loss": 2.9844, + "step": 472 + }, + { + "epoch": 2.9, + "learning_rate": 1.6655789372430572e-05, + "loss": 3.0859, + "step": 473 + }, + { + "epoch": 2.91, + "learning_rate": 1.6640946027672395e-05, + "loss": 3.1758, + "step": 474 + }, + { + "epoch": 2.91, + "learning_rate": 1.66260764609142e-05, + "loss": 3.1719, + "step": 475 + }, + { + "epoch": 2.92, + "learning_rate": 1.6611180730868975e-05, + "loss": 3.0508, + "step": 476 + }, + { + "epoch": 2.93, + "learning_rate": 1.6596258896353027e-05, + "loss": 3.1406, + "step": 477 + }, + { + "epoch": 2.93, + "learning_rate": 1.658131101628571e-05, + "loss": 3.1836, + "step": 478 + }, + { + "epoch": 2.94, + "learning_rate": 1.656633714968924e-05, + "loss": 3.0352, + "step": 479 + }, + { + "epoch": 2.94, + "learning_rate": 1.6551337355688437e-05, + "loss": 2.8789, + "step": 480 + }, + { + "epoch": 2.95, + "learning_rate": 1.653631169351049e-05, + "loss": 3.1094, + "step": 481 + }, + { + "epoch": 2.96, + "learning_rate": 1.6521260222484738e-05, + "loss": 3.4102, + "step": 482 + }, + { + "epoch": 2.96, + "learning_rate": 1.650618300204242e-05, + "loss": 3.293, + "step": 483 + }, + { + "epoch": 2.97, + "learning_rate": 1.6491080091716457e-05, + "loss": 2.9922, + "step": 484 + }, + { + "epoch": 2.98, + "learning_rate": 1.64759515511412e-05, + "loss": 3.082, + "step": 485 + }, + { + "epoch": 2.98, + "learning_rate": 1.6460797440052195e-05, + "loss": 2.9297, + "step": 486 + }, + { + "epoch": 2.99, + "learning_rate": 1.6445617818285974e-05, + "loss": 2.8906, + "step": 487 + }, + { + "epoch": 2.99, + "learning_rate": 1.643041274577978e-05, + "loss": 3.0625, + "step": 488 + }, + { + "epoch": 3.0, + "learning_rate": 1.6415182282571356e-05, + "loss": 3.1562, + "step": 489 + }, + { + "epoch": 3.01, + "learning_rate": 1.6399926488798702e-05, + "loss": 2.6367, + "step": 490 + }, + { + "epoch": 3.01, + "learning_rate": 1.6384645424699835e-05, + "loss": 2.207, + "step": 491 + }, + { + "epoch": 3.02, + "learning_rate": 1.6369339150612557e-05, + "loss": 2.4844, + "step": 492 + }, + { + "epoch": 3.02, + "learning_rate": 1.6354007726974205e-05, + "loss": 2.4219, + "step": 493 + }, + { + "epoch": 3.03, + "learning_rate": 1.6338651214321426e-05, + "loss": 2.4531, + "step": 494 + }, + { + "epoch": 3.04, + "learning_rate": 1.632326967328993e-05, + "loss": 2.4961, + "step": 495 + }, + { + "epoch": 3.04, + "learning_rate": 1.630786316461425e-05, + "loss": 2.4219, + "step": 496 + }, + { + "epoch": 3.05, + "learning_rate": 1.6292431749127507e-05, + "loss": 2.5273, + "step": 497 + }, + { + "epoch": 3.06, + "learning_rate": 1.627697548776117e-05, + "loss": 2.4492, + "step": 498 + }, + { + "epoch": 3.06, + "learning_rate": 1.6261494441544805e-05, + "loss": 2.4922, + "step": 499 + }, + { + "epoch": 3.07, + "learning_rate": 1.624598867160585e-05, + "loss": 2.375, + "step": 500 + }, + { + "epoch": 3.07, + "learning_rate": 1.623045823916936e-05, + "loss": 2.6914, + "step": 501 + }, + { + "epoch": 3.08, + "learning_rate": 1.6214903205557774e-05, + "loss": 2.4141, + "step": 502 + }, + { + "epoch": 3.09, + "learning_rate": 1.619932363219067e-05, + "loss": 2.5742, + "step": 503 + }, + { + "epoch": 3.09, + "learning_rate": 1.6183719580584515e-05, + "loss": 2.332, + "step": 504 + }, + { + "epoch": 3.1, + "learning_rate": 1.6168091112352443e-05, + "loss": 2.4727, + "step": 505 + }, + { + "epoch": 3.1, + "learning_rate": 1.6152438289203982e-05, + "loss": 2.5352, + "step": 506 + }, + { + "epoch": 3.11, + "learning_rate": 1.6136761172944837e-05, + "loss": 2.4375, + "step": 507 + }, + { + "epoch": 3.12, + "learning_rate": 1.612105982547663e-05, + "loss": 2.543, + "step": 508 + }, + { + "epoch": 3.12, + "learning_rate": 1.6105334308796665e-05, + "loss": 2.3945, + "step": 509 + }, + { + "epoch": 3.13, + "learning_rate": 1.6089584684997674e-05, + "loss": 2.4531, + "step": 510 + }, + { + "epoch": 3.13, + "learning_rate": 1.607381101626758e-05, + "loss": 2.5781, + "step": 511 + }, + { + "epoch": 3.14, + "learning_rate": 1.6058013364889247e-05, + "loss": 2.2852, + "step": 512 + }, + { + "epoch": 3.15, + "learning_rate": 1.6042191793240242e-05, + "loss": 2.293, + "step": 513 + }, + { + "epoch": 3.15, + "learning_rate": 1.6026346363792565e-05, + "loss": 2.5156, + "step": 514 + }, + { + "epoch": 3.16, + "learning_rate": 1.6010477139112438e-05, + "loss": 2.3711, + "step": 515 + }, + { + "epoch": 3.17, + "learning_rate": 1.5994584181860028e-05, + "loss": 2.2891, + "step": 516 + }, + { + "epoch": 3.17, + "learning_rate": 1.5978667554789216e-05, + "loss": 2.3867, + "step": 517 + }, + { + "epoch": 3.18, + "learning_rate": 1.596272732074734e-05, + "loss": 2.457, + "step": 518 + }, + { + "epoch": 3.18, + "learning_rate": 1.5946763542674958e-05, + "loss": 2.293, + "step": 519 + }, + { + "epoch": 3.19, + "learning_rate": 1.5930776283605585e-05, + "loss": 2.4492, + "step": 520 + }, + { + "epoch": 3.2, + "learning_rate": 1.5914765606665454e-05, + "loss": 2.2383, + "step": 521 + }, + { + "epoch": 3.2, + "learning_rate": 1.5898731575073262e-05, + "loss": 2.3281, + "step": 522 + }, + { + "epoch": 3.21, + "learning_rate": 1.5882674252139928e-05, + "loss": 2.4688, + "step": 523 + }, + { + "epoch": 3.21, + "learning_rate": 1.5866593701268334e-05, + "loss": 2.3125, + "step": 524 + }, + { + "epoch": 3.22, + "learning_rate": 1.5850489985953076e-05, + "loss": 2.3672, + "step": 525 + }, + { + "epoch": 3.23, + "learning_rate": 1.5834363169780227e-05, + "loss": 2.4688, + "step": 526 + }, + { + "epoch": 3.23, + "learning_rate": 1.5818213316427056e-05, + "loss": 2.375, + "step": 527 + }, + { + "epoch": 3.24, + "learning_rate": 1.5802040489661817e-05, + "loss": 2.418, + "step": 528 + }, + { + "epoch": 3.25, + "learning_rate": 1.578584475334345e-05, + "loss": 2.3867, + "step": 529 + }, + { + "epoch": 3.25, + "learning_rate": 1.5769626171421376e-05, + "loss": 2.2852, + "step": 530 + }, + { + "epoch": 3.26, + "learning_rate": 1.5753384807935214e-05, + "loss": 2.5234, + "step": 531 + }, + { + "epoch": 3.26, + "learning_rate": 1.5737120727014535e-05, + "loss": 2.3828, + "step": 532 + }, + { + "epoch": 3.27, + "learning_rate": 1.572083399287861e-05, + "loss": 2.4023, + "step": 533 + }, + { + "epoch": 3.28, + "learning_rate": 1.570452466983617e-05, + "loss": 2.4961, + "step": 534 + }, + { + "epoch": 3.28, + "learning_rate": 1.5688192822285116e-05, + "loss": 2.5234, + "step": 535 + }, + { + "epoch": 3.29, + "learning_rate": 1.567183851471231e-05, + "loss": 2.418, + "step": 536 + }, + { + "epoch": 3.29, + "learning_rate": 1.565546181169328e-05, + "loss": 2.3555, + "step": 537 + }, + { + "epoch": 3.3, + "learning_rate": 1.5639062777892e-05, + "loss": 2.4883, + "step": 538 + }, + { + "epoch": 3.31, + "learning_rate": 1.5622641478060602e-05, + "loss": 2.5586, + "step": 539 + }, + { + "epoch": 3.31, + "learning_rate": 1.5606197977039154e-05, + "loss": 2.3359, + "step": 540 + }, + { + "epoch": 3.32, + "learning_rate": 1.5589732339755362e-05, + "loss": 2.3398, + "step": 541 + }, + { + "epoch": 3.33, + "learning_rate": 1.5573244631224364e-05, + "loss": 2.2969, + "step": 542 + }, + { + "epoch": 3.33, + "learning_rate": 1.5556734916548432e-05, + "loss": 2.375, + "step": 543 + }, + { + "epoch": 3.34, + "learning_rate": 1.5540203260916728e-05, + "loss": 2.3398, + "step": 544 + }, + { + "epoch": 3.34, + "learning_rate": 1.552364972960506e-05, + "loss": 2.3516, + "step": 545 + }, + { + "epoch": 3.35, + "learning_rate": 1.5507074387975603e-05, + "loss": 2.4805, + "step": 546 + }, + { + "epoch": 3.36, + "learning_rate": 1.5490477301476648e-05, + "loss": 2.4766, + "step": 547 + }, + { + "epoch": 3.36, + "learning_rate": 1.5473858535642365e-05, + "loss": 2.4062, + "step": 548 + }, + { + "epoch": 3.37, + "learning_rate": 1.5457218156092503e-05, + "loss": 2.4727, + "step": 549 + }, + { + "epoch": 3.37, + "learning_rate": 1.5440556228532168e-05, + "loss": 2.3672, + "step": 550 + }, + { + "epoch": 3.38, + "learning_rate": 1.5423872818751544e-05, + "loss": 2.5195, + "step": 551 + }, + { + "epoch": 3.39, + "learning_rate": 1.5407167992625636e-05, + "loss": 2.418, + "step": 552 + }, + { + "epoch": 3.39, + "learning_rate": 1.5390441816114022e-05, + "loss": 2.3828, + "step": 553 + }, + { + "epoch": 3.4, + "learning_rate": 1.5373694355260565e-05, + "loss": 2.4336, + "step": 554 + }, + { + "epoch": 3.4, + "learning_rate": 1.5356925676193192e-05, + "loss": 2.3086, + "step": 555 + }, + { + "epoch": 3.41, + "learning_rate": 1.534013584512359e-05, + "loss": 2.25, + "step": 556 + }, + { + "epoch": 3.42, + "learning_rate": 1.5323324928346984e-05, + "loss": 2.3242, + "step": 557 + }, + { + "epoch": 3.42, + "learning_rate": 1.5306492992241836e-05, + "loss": 2.4023, + "step": 558 + }, + { + "epoch": 3.43, + "learning_rate": 1.5289640103269626e-05, + "loss": 2.4531, + "step": 559 + }, + { + "epoch": 3.44, + "learning_rate": 1.527276632797455e-05, + "loss": 2.3945, + "step": 560 + }, + { + "epoch": 3.44, + "learning_rate": 1.5255871732983284e-05, + "loss": 2.4258, + "step": 561 + }, + { + "epoch": 3.45, + "learning_rate": 1.5238956385004703e-05, + "loss": 2.4766, + "step": 562 + }, + { + "epoch": 3.45, + "learning_rate": 1.5222020350829636e-05, + "loss": 2.4141, + "step": 563 + }, + { + "epoch": 3.46, + "learning_rate": 1.5205063697330582e-05, + "loss": 2.3359, + "step": 564 + }, + { + "epoch": 3.47, + "learning_rate": 1.5188086491461467e-05, + "loss": 2.3047, + "step": 565 + }, + { + "epoch": 3.47, + "learning_rate": 1.5171088800257354e-05, + "loss": 2.5508, + "step": 566 + }, + { + "epoch": 3.48, + "learning_rate": 1.5154070690834211e-05, + "loss": 2.0957, + "step": 567 + }, + { + "epoch": 3.48, + "learning_rate": 1.5137032230388613e-05, + "loss": 2.4102, + "step": 568 + }, + { + "epoch": 3.49, + "learning_rate": 1.5119973486197497e-05, + "loss": 2.5352, + "step": 569 + }, + { + "epoch": 3.5, + "learning_rate": 1.5102894525617892e-05, + "loss": 2.25, + "step": 570 + }, + { + "epoch": 3.5, + "learning_rate": 1.5085795416086655e-05, + "loss": 2.3047, + "step": 571 + }, + { + "epoch": 3.51, + "learning_rate": 1.5068676225120196e-05, + "loss": 2.3359, + "step": 572 + }, + { + "epoch": 3.52, + "learning_rate": 1.5051537020314218e-05, + "loss": 2.5508, + "step": 573 + }, + { + "epoch": 3.52, + "learning_rate": 1.5034377869343453e-05, + "loss": 2.6211, + "step": 574 + }, + { + "epoch": 3.53, + "learning_rate": 1.5017198839961388e-05, + "loss": 2.5625, + "step": 575 + }, + { + "epoch": 3.53, + "learning_rate": 1.5000000000000002e-05, + "loss": 2.293, + "step": 576 + }, + { + "epoch": 3.54, + "learning_rate": 1.4982781417369496e-05, + "loss": 2.5078, + "step": 577 + }, + { + "epoch": 3.55, + "learning_rate": 1.4965543160058028e-05, + "loss": 2.3594, + "step": 578 + }, + { + "epoch": 3.55, + "learning_rate": 1.4948285296131435e-05, + "loss": 2.4531, + "step": 579 + }, + { + "epoch": 3.56, + "learning_rate": 1.4931007893732981e-05, + "loss": 2.4961, + "step": 580 + }, + { + "epoch": 3.56, + "learning_rate": 1.4913711021083071e-05, + "loss": 2.3672, + "step": 581 + }, + { + "epoch": 3.57, + "learning_rate": 1.4896394746478995e-05, + "loss": 2.5469, + "step": 582 + }, + { + "epoch": 3.58, + "learning_rate": 1.4879059138294647e-05, + "loss": 2.5703, + "step": 583 + }, + { + "epoch": 3.58, + "learning_rate": 1.4861704264980264e-05, + "loss": 2.5859, + "step": 584 + }, + { + "epoch": 3.59, + "learning_rate": 1.4844330195062145e-05, + "loss": 2.4648, + "step": 585 + }, + { + "epoch": 3.6, + "learning_rate": 1.4826936997142399e-05, + "loss": 2.4883, + "step": 586 + }, + { + "epoch": 3.6, + "learning_rate": 1.4809524739898651e-05, + "loss": 2.2656, + "step": 587 + }, + { + "epoch": 3.61, + "learning_rate": 1.4792093492083792e-05, + "loss": 2.2734, + "step": 588 + }, + { + "epoch": 3.61, + "learning_rate": 1.4774643322525691e-05, + "loss": 2.5156, + "step": 589 + }, + { + "epoch": 3.62, + "learning_rate": 1.4757174300126935e-05, + "loss": 2.6797, + "step": 590 + }, + { + "epoch": 3.63, + "learning_rate": 1.473968649386455e-05, + "loss": 2.3398, + "step": 591 + }, + { + "epoch": 3.63, + "learning_rate": 1.4722179972789725e-05, + "loss": 2.2539, + "step": 592 + }, + { + "epoch": 3.64, + "learning_rate": 1.4704654806027558e-05, + "loss": 2.5781, + "step": 593 + }, + { + "epoch": 3.64, + "learning_rate": 1.4687111062776758e-05, + "loss": 2.5352, + "step": 594 + }, + { + "epoch": 3.65, + "learning_rate": 1.466954881230939e-05, + "loss": 2.5195, + "step": 595 + }, + { + "epoch": 3.66, + "learning_rate": 1.4651968123970592e-05, + "loss": 2.3945, + "step": 596 + }, + { + "epoch": 3.66, + "learning_rate": 1.4634369067178312e-05, + "loss": 2.4922, + "step": 597 + }, + { + "epoch": 3.67, + "learning_rate": 1.4616751711423016e-05, + "loss": 2.4922, + "step": 598 + }, + { + "epoch": 3.67, + "learning_rate": 1.4599116126267431e-05, + "loss": 2.4961, + "step": 599 + }, + { + "epoch": 3.68, + "learning_rate": 1.4581462381346261e-05, + "loss": 2.4922, + "step": 600 + }, + { + "epoch": 3.69, + "learning_rate": 1.4563790546365914e-05, + "loss": 2.5, + "step": 601 + }, + { + "epoch": 3.69, + "learning_rate": 1.454610069110423e-05, + "loss": 2.4219, + "step": 602 + }, + { + "epoch": 3.7, + "learning_rate": 1.45283928854102e-05, + "loss": 2.418, + "step": 603 + }, + { + "epoch": 3.71, + "learning_rate": 1.4510667199203697e-05, + "loss": 2.5488, + "step": 604 + }, + { + "epoch": 3.71, + "learning_rate": 1.4492923702475183e-05, + "loss": 2.5312, + "step": 605 + }, + { + "epoch": 3.72, + "learning_rate": 1.4475162465285463e-05, + "loss": 2.5273, + "step": 606 + }, + { + "epoch": 3.72, + "learning_rate": 1.4457383557765385e-05, + "loss": 2.4141, + "step": 607 + }, + { + "epoch": 3.73, + "learning_rate": 1.443958705011556e-05, + "loss": 2.4453, + "step": 608 + }, + { + "epoch": 3.74, + "learning_rate": 1.4421773012606104e-05, + "loss": 2.293, + "step": 609 + }, + { + "epoch": 3.74, + "learning_rate": 1.4403941515576344e-05, + "loss": 2.4258, + "step": 610 + }, + { + "epoch": 3.75, + "learning_rate": 1.4386092629434551e-05, + "loss": 2.4648, + "step": 611 + }, + { + "epoch": 3.75, + "learning_rate": 1.4368226424657661e-05, + "loss": 2.3438, + "step": 612 + }, + { + "epoch": 3.76, + "learning_rate": 1.4350342971790979e-05, + "loss": 2.2168, + "step": 613 + }, + { + "epoch": 3.77, + "learning_rate": 1.4332442341447926e-05, + "loss": 2.3828, + "step": 614 + }, + { + "epoch": 3.77, + "learning_rate": 1.4314524604309748e-05, + "loss": 2.5117, + "step": 615 + }, + { + "epoch": 3.78, + "learning_rate": 1.4296589831125234e-05, + "loss": 2.4961, + "step": 616 + }, + { + "epoch": 3.79, + "learning_rate": 1.4278638092710446e-05, + "loss": 2.5391, + "step": 617 + }, + { + "epoch": 3.79, + "learning_rate": 1.4260669459948429e-05, + "loss": 2.3828, + "step": 618 + }, + { + "epoch": 3.8, + "learning_rate": 1.4242684003788934e-05, + "loss": 2.4102, + "step": 619 + }, + { + "epoch": 3.8, + "learning_rate": 1.4224681795248149e-05, + "loss": 2.457, + "step": 620 + }, + { + "epoch": 3.81, + "learning_rate": 1.42066629054084e-05, + "loss": 2.5, + "step": 621 + }, + { + "epoch": 3.82, + "learning_rate": 1.418862740541788e-05, + "loss": 2.4102, + "step": 622 + }, + { + "epoch": 3.82, + "learning_rate": 1.4170575366490376e-05, + "loss": 2.1758, + "step": 623 + }, + { + "epoch": 3.83, + "learning_rate": 1.415250685990497e-05, + "loss": 2.6445, + "step": 624 + }, + { + "epoch": 3.83, + "learning_rate": 1.4134421957005775e-05, + "loss": 2.043, + "step": 625 + }, + { + "epoch": 3.84, + "learning_rate": 1.4116320729201642e-05, + "loss": 2.457, + "step": 626 + }, + { + "epoch": 3.85, + "learning_rate": 1.4098203247965876e-05, + "loss": 2.1992, + "step": 627 + }, + { + "epoch": 3.85, + "learning_rate": 1.4080069584835971e-05, + "loss": 2.2891, + "step": 628 + }, + { + "epoch": 3.86, + "learning_rate": 1.4061919811413305e-05, + "loss": 2.2227, + "step": 629 + }, + { + "epoch": 3.87, + "learning_rate": 1.4043753999362872e-05, + "loss": 2.2305, + "step": 630 + }, + { + "epoch": 3.87, + "learning_rate": 1.4025572220412998e-05, + "loss": 2.625, + "step": 631 + }, + { + "epoch": 3.88, + "learning_rate": 1.400737454635505e-05, + "loss": 2.4219, + "step": 632 + }, + { + "epoch": 3.88, + "learning_rate": 1.398916104904316e-05, + "loss": 2.6133, + "step": 633 + }, + { + "epoch": 3.89, + "learning_rate": 1.3970931800393943e-05, + "loss": 2.5625, + "step": 634 + }, + { + "epoch": 3.9, + "learning_rate": 1.3952686872386195e-05, + "loss": 2.4531, + "step": 635 + }, + { + "epoch": 3.9, + "learning_rate": 1.3934426337060638e-05, + "loss": 2.6016, + "step": 636 + }, + { + "epoch": 3.91, + "learning_rate": 1.391615026651961e-05, + "loss": 2.3789, + "step": 637 + }, + { + "epoch": 3.91, + "learning_rate": 1.3897858732926794e-05, + "loss": 2.3281, + "step": 638 + }, + { + "epoch": 3.92, + "learning_rate": 1.3879551808506932e-05, + "loss": 2.2031, + "step": 639 + }, + { + "epoch": 3.93, + "learning_rate": 1.3861229565545532e-05, + "loss": 2.5352, + "step": 640 + }, + { + "epoch": 3.93, + "learning_rate": 1.384289207638859e-05, + "loss": 2.3008, + "step": 641 + }, + { + "epoch": 3.94, + "learning_rate": 1.3824539413442304e-05, + "loss": 2.5352, + "step": 642 + }, + { + "epoch": 3.94, + "learning_rate": 1.3806171649172782e-05, + "loss": 2.4922, + "step": 643 + }, + { + "epoch": 3.95, + "learning_rate": 1.3787788856105762e-05, + "loss": 2.3945, + "step": 644 + }, + { + "epoch": 3.96, + "learning_rate": 1.3769391106826326e-05, + "loss": 2.6016, + "step": 645 + }, + { + "epoch": 3.96, + "learning_rate": 1.3750978473978611e-05, + "loss": 2.4375, + "step": 646 + }, + { + "epoch": 3.97, + "learning_rate": 1.3732551030265514e-05, + "loss": 2.5195, + "step": 647 + }, + { + "epoch": 3.98, + "learning_rate": 1.371410884844843e-05, + "loss": 2.5391, + "step": 648 + }, + { + "epoch": 3.98, + "learning_rate": 1.3695652001346928e-05, + "loss": 2.4102, + "step": 649 + }, + { + "epoch": 3.99, + "learning_rate": 1.3677180561838501e-05, + "loss": 2.4727, + "step": 650 + }, + { + "epoch": 3.99, + "learning_rate": 1.3658694602858247e-05, + "loss": 2.6055, + "step": 651 + }, + { + "epoch": 4.0, + "learning_rate": 1.36401941973986e-05, + "loss": 2.2852, + "step": 652 + }, + { + "epoch": 4.01, + "learning_rate": 1.362167941850904e-05, + "loss": 1.9121, + "step": 653 + }, + { + "epoch": 4.01, + "learning_rate": 1.3603150339295797e-05, + "loss": 2.0977, + "step": 654 + }, + { + "epoch": 4.02, + "learning_rate": 1.3584607032921566e-05, + "loss": 1.9668, + "step": 655 + }, + { + "epoch": 4.02, + "learning_rate": 1.3566049572605222e-05, + "loss": 1.8398, + "step": 656 + }, + { + "epoch": 4.03, + "learning_rate": 1.3547478031621517e-05, + "loss": 1.7559, + "step": 657 + }, + { + "epoch": 4.04, + "learning_rate": 1.3528892483300821e-05, + "loss": 2.0586, + "step": 658 + }, + { + "epoch": 4.04, + "learning_rate": 1.3510293001028792e-05, + "loss": 1.8984, + "step": 659 + }, + { + "epoch": 4.05, + "learning_rate": 1.3491679658246114e-05, + "loss": 1.6895, + "step": 660 + }, + { + "epoch": 4.06, + "learning_rate": 1.3473052528448203e-05, + "loss": 1.7812, + "step": 661 + }, + { + "epoch": 4.06, + "learning_rate": 1.3454411685184913e-05, + "loss": 1.7539, + "step": 662 + }, + { + "epoch": 4.07, + "learning_rate": 1.3435757202060242e-05, + "loss": 1.9492, + "step": 663 + }, + { + "epoch": 4.07, + "learning_rate": 1.3417089152732049e-05, + "loss": 1.7031, + "step": 664 + }, + { + "epoch": 4.08, + "learning_rate": 1.3398407610911752e-05, + "loss": 1.791, + "step": 665 + }, + { + "epoch": 4.09, + "learning_rate": 1.3379712650364061e-05, + "loss": 1.8066, + "step": 666 + }, + { + "epoch": 4.09, + "learning_rate": 1.3361004344906652e-05, + "loss": 1.6992, + "step": 667 + }, + { + "epoch": 4.1, + "learning_rate": 1.3342282768409904e-05, + "loss": 1.8965, + "step": 668 + }, + { + "epoch": 4.1, + "learning_rate": 1.3323547994796597e-05, + "loss": 1.7832, + "step": 669 + }, + { + "epoch": 4.11, + "learning_rate": 1.330480009804162e-05, + "loss": 1.8633, + "step": 670 + }, + { + "epoch": 4.12, + "learning_rate": 1.3286039152171667e-05, + "loss": 1.6055, + "step": 671 + }, + { + "epoch": 4.12, + "learning_rate": 1.3267265231264982e-05, + "loss": 1.8164, + "step": 672 + }, + { + "epoch": 4.13, + "learning_rate": 1.3248478409451017e-05, + "loss": 1.9805, + "step": 673 + }, + { + "epoch": 4.13, + "learning_rate": 1.3229678760910174e-05, + "loss": 1.666, + "step": 674 + }, + { + "epoch": 4.14, + "learning_rate": 1.3210866359873506e-05, + "loss": 1.8867, + "step": 675 + }, + { + "epoch": 4.15, + "learning_rate": 1.3192041280622409e-05, + "loss": 1.9473, + "step": 676 + }, + { + "epoch": 4.15, + "learning_rate": 1.3173203597488348e-05, + "loss": 1.9375, + "step": 677 + }, + { + "epoch": 4.16, + "learning_rate": 1.3154353384852559e-05, + "loss": 1.8145, + "step": 678 + }, + { + "epoch": 4.17, + "learning_rate": 1.3135490717145726e-05, + "loss": 1.7539, + "step": 679 + }, + { + "epoch": 4.17, + "learning_rate": 1.3116615668847749e-05, + "loss": 1.7734, + "step": 680 + }, + { + "epoch": 4.18, + "learning_rate": 1.3097728314487385e-05, + "loss": 1.7656, + "step": 681 + }, + { + "epoch": 4.18, + "learning_rate": 1.3078828728641994e-05, + "loss": 1.8672, + "step": 682 + }, + { + "epoch": 4.19, + "learning_rate": 1.305991698593723e-05, + "loss": 1.7656, + "step": 683 + }, + { + "epoch": 4.2, + "learning_rate": 1.3040993161046749e-05, + "loss": 1.8789, + "step": 684 + }, + { + "epoch": 4.2, + "learning_rate": 1.3022057328691915e-05, + "loss": 1.627, + "step": 685 + }, + { + "epoch": 4.21, + "learning_rate": 1.3003109563641499e-05, + "loss": 1.7695, + "step": 686 + }, + { + "epoch": 4.21, + "learning_rate": 1.298414994071139e-05, + "loss": 1.709, + "step": 687 + }, + { + "epoch": 4.22, + "learning_rate": 1.2965178534764311e-05, + "loss": 1.7383, + "step": 688 + }, + { + "epoch": 4.23, + "learning_rate": 1.294619542070949e-05, + "loss": 1.6523, + "step": 689 + }, + { + "epoch": 4.23, + "learning_rate": 1.2927200673502399e-05, + "loss": 1.8145, + "step": 690 + }, + { + "epoch": 4.24, + "learning_rate": 1.2908194368144437e-05, + "loss": 1.7949, + "step": 691 + }, + { + "epoch": 4.25, + "learning_rate": 1.288917657968265e-05, + "loss": 1.7422, + "step": 692 + }, + { + "epoch": 4.25, + "learning_rate": 1.287014738320941e-05, + "loss": 1.9102, + "step": 693 + }, + { + "epoch": 4.26, + "learning_rate": 1.285110685386215e-05, + "loss": 1.6523, + "step": 694 + }, + { + "epoch": 4.26, + "learning_rate": 1.283205506682304e-05, + "loss": 1.5938, + "step": 695 + }, + { + "epoch": 4.27, + "learning_rate": 1.2812992097318711e-05, + "loss": 1.6797, + "step": 696 + }, + { + "epoch": 4.28, + "learning_rate": 1.2793918020619937e-05, + "loss": 1.8164, + "step": 697 + }, + { + "epoch": 4.28, + "learning_rate": 1.2774832912041356e-05, + "loss": 1.6328, + "step": 698 + }, + { + "epoch": 4.29, + "learning_rate": 1.2755736846941167e-05, + "loss": 1.9219, + "step": 699 + }, + { + "epoch": 4.29, + "learning_rate": 1.2736629900720832e-05, + "loss": 1.8496, + "step": 700 + }, + { + "epoch": 4.3, + "learning_rate": 1.2717512148824764e-05, + "loss": 1.7031, + "step": 701 + }, + { + "epoch": 4.31, + "learning_rate": 1.2698383666740064e-05, + "loss": 1.7266, + "step": 702 + }, + { + "epoch": 4.31, + "learning_rate": 1.2679244529996182e-05, + "loss": 1.9102, + "step": 703 + }, + { + "epoch": 4.32, + "learning_rate": 1.2660094814164653e-05, + "loss": 1.6855, + "step": 704 + }, + { + "epoch": 4.33, + "learning_rate": 1.2640934594858773e-05, + "loss": 1.6641, + "step": 705 + }, + { + "epoch": 4.33, + "learning_rate": 1.262176394773332e-05, + "loss": 1.8672, + "step": 706 + }, + { + "epoch": 4.34, + "learning_rate": 1.2602582948484243e-05, + "loss": 1.7383, + "step": 707 + }, + { + "epoch": 4.34, + "learning_rate": 1.2583391672848361e-05, + "loss": 2.0586, + "step": 708 + }, + { + "epoch": 4.35, + "learning_rate": 1.256419019660308e-05, + "loss": 1.8281, + "step": 709 + }, + { + "epoch": 4.36, + "learning_rate": 1.2544978595566078e-05, + "loss": 1.7207, + "step": 710 + }, + { + "epoch": 4.36, + "learning_rate": 1.2525756945595006e-05, + "loss": 1.6328, + "step": 711 + }, + { + "epoch": 4.37, + "learning_rate": 1.2506525322587207e-05, + "loss": 1.8379, + "step": 712 + }, + { + "epoch": 4.37, + "learning_rate": 1.2487283802479389e-05, + "loss": 1.8828, + "step": 713 + }, + { + "epoch": 4.38, + "learning_rate": 1.246803246124735e-05, + "loss": 1.916, + "step": 714 + }, + { + "epoch": 4.39, + "learning_rate": 1.2448771374905655e-05, + "loss": 1.7852, + "step": 715 + }, + { + "epoch": 4.39, + "learning_rate": 1.2429500619507362e-05, + "loss": 2.0391, + "step": 716 + }, + { + "epoch": 4.4, + "learning_rate": 1.2410220271143693e-05, + "loss": 1.7422, + "step": 717 + }, + { + "epoch": 4.4, + "learning_rate": 1.2390930405943766e-05, + "loss": 1.8672, + "step": 718 + }, + { + "epoch": 4.41, + "learning_rate": 1.237163110007426e-05, + "loss": 1.8457, + "step": 719 + }, + { + "epoch": 4.42, + "learning_rate": 1.2352322429739134e-05, + "loss": 1.7402, + "step": 720 + }, + { + "epoch": 4.42, + "learning_rate": 1.233300447117933e-05, + "loss": 1.6465, + "step": 721 + }, + { + "epoch": 4.43, + "learning_rate": 1.2313677300672463e-05, + "loss": 1.6777, + "step": 722 + }, + { + "epoch": 4.44, + "learning_rate": 1.2294340994532511e-05, + "loss": 1.7656, + "step": 723 + }, + { + "epoch": 4.44, + "learning_rate": 1.2274995629109545e-05, + "loss": 1.8066, + "step": 724 + }, + { + "epoch": 4.45, + "learning_rate": 1.2255641280789385e-05, + "loss": 1.8809, + "step": 725 + }, + { + "epoch": 4.45, + "learning_rate": 1.2236278025993334e-05, + "loss": 1.8223, + "step": 726 + }, + { + "epoch": 4.46, + "learning_rate": 1.2216905941177854e-05, + "loss": 1.7656, + "step": 727 + }, + { + "epoch": 4.47, + "learning_rate": 1.2197525102834284e-05, + "loss": 1.8066, + "step": 728 + }, + { + "epoch": 4.47, + "learning_rate": 1.2178135587488515e-05, + "loss": 1.7207, + "step": 729 + }, + { + "epoch": 4.48, + "learning_rate": 1.215873747170071e-05, + "loss": 1.8535, + "step": 730 + }, + { + "epoch": 4.48, + "learning_rate": 1.2139330832064975e-05, + "loss": 1.7949, + "step": 731 + }, + { + "epoch": 4.49, + "learning_rate": 1.2119915745209092e-05, + "loss": 1.8926, + "step": 732 + }, + { + "epoch": 4.5, + "learning_rate": 1.2100492287794186e-05, + "loss": 1.6777, + "step": 733 + }, + { + "epoch": 4.5, + "learning_rate": 1.2081060536514432e-05, + "loss": 1.7773, + "step": 734 + }, + { + "epoch": 4.51, + "learning_rate": 1.206162056809676e-05, + "loss": 1.6699, + "step": 735 + }, + { + "epoch": 4.52, + "learning_rate": 1.2042172459300546e-05, + "loss": 1.709, + "step": 736 + }, + { + "epoch": 4.52, + "learning_rate": 1.2022716286917298e-05, + "loss": 1.8887, + "step": 737 + }, + { + "epoch": 4.53, + "learning_rate": 1.2003252127770378e-05, + "loss": 1.9219, + "step": 738 + }, + { + "epoch": 4.53, + "learning_rate": 1.198378005871467e-05, + "loss": 1.8535, + "step": 739 + }, + { + "epoch": 4.54, + "learning_rate": 1.1964300156636304e-05, + "loss": 1.7051, + "step": 740 + }, + { + "epoch": 4.55, + "learning_rate": 1.1944812498452329e-05, + "loss": 1.7578, + "step": 741 + }, + { + "epoch": 4.55, + "learning_rate": 1.192531716111042e-05, + "loss": 1.8203, + "step": 742 + }, + { + "epoch": 4.56, + "learning_rate": 1.1905814221588581e-05, + "loss": 1.6016, + "step": 743 + }, + { + "epoch": 4.56, + "learning_rate": 1.1886303756894828e-05, + "loss": 1.543, + "step": 744 + }, + { + "epoch": 4.57, + "learning_rate": 1.1866785844066884e-05, + "loss": 1.8145, + "step": 745 + }, + { + "epoch": 4.58, + "learning_rate": 1.1847260560171895e-05, + "loss": 1.6719, + "step": 746 + }, + { + "epoch": 4.58, + "learning_rate": 1.18277279823061e-05, + "loss": 1.6953, + "step": 747 + }, + { + "epoch": 4.59, + "learning_rate": 1.1808188187594549e-05, + "loss": 1.6406, + "step": 748 + }, + { + "epoch": 4.6, + "learning_rate": 1.1788641253190779e-05, + "loss": 1.7246, + "step": 749 + }, + { + "epoch": 4.6, + "learning_rate": 1.176908725627652e-05, + "loss": 1.6992, + "step": 750 + }, + { + "epoch": 4.61, + "learning_rate": 1.1749526274061394e-05, + "loss": 1.916, + "step": 751 + }, + { + "epoch": 4.61, + "learning_rate": 1.1729958383782598e-05, + "loss": 1.6543, + "step": 752 + }, + { + "epoch": 4.62, + "learning_rate": 1.1710383662704608e-05, + "loss": 1.707, + "step": 753 + }, + { + "epoch": 4.63, + "learning_rate": 1.1690802188118878e-05, + "loss": 1.6953, + "step": 754 + }, + { + "epoch": 4.63, + "learning_rate": 1.1671214037343515e-05, + "loss": 1.6875, + "step": 755 + }, + { + "epoch": 4.64, + "learning_rate": 1.1651619287723e-05, + "loss": 1.7969, + "step": 756 + }, + { + "epoch": 4.64, + "learning_rate": 1.1632018016627859e-05, + "loss": 1.7461, + "step": 757 + }, + { + "epoch": 4.65, + "learning_rate": 1.1612410301454384e-05, + "loss": 1.8887, + "step": 758 + }, + { + "epoch": 4.66, + "learning_rate": 1.1592796219624292e-05, + "loss": 1.9414, + "step": 759 + }, + { + "epoch": 4.66, + "learning_rate": 1.1573175848584455e-05, + "loss": 1.8711, + "step": 760 + }, + { + "epoch": 4.67, + "learning_rate": 1.1553549265806567e-05, + "loss": 1.7246, + "step": 761 + }, + { + "epoch": 4.67, + "learning_rate": 1.1533916548786856e-05, + "loss": 1.8496, + "step": 762 + }, + { + "epoch": 4.68, + "learning_rate": 1.1514277775045768e-05, + "loss": 1.918, + "step": 763 + }, + { + "epoch": 4.69, + "learning_rate": 1.1494633022127669e-05, + "loss": 1.8574, + "step": 764 + }, + { + "epoch": 4.69, + "learning_rate": 1.1474982367600524e-05, + "loss": 1.668, + "step": 765 + }, + { + "epoch": 4.7, + "learning_rate": 1.1455325889055616e-05, + "loss": 1.7031, + "step": 766 + }, + { + "epoch": 4.71, + "learning_rate": 1.1435663664107204e-05, + "loss": 1.7754, + "step": 767 + }, + { + "epoch": 4.71, + "learning_rate": 1.141599577039226e-05, + "loss": 1.7129, + "step": 768 + }, + { + "epoch": 4.72, + "learning_rate": 1.1396322285570119e-05, + "loss": 1.6582, + "step": 769 + }, + { + "epoch": 4.72, + "learning_rate": 1.1376643287322202e-05, + "loss": 1.8672, + "step": 770 + }, + { + "epoch": 4.73, + "learning_rate": 1.1356958853351705e-05, + "loss": 1.8867, + "step": 771 + }, + { + "epoch": 4.74, + "learning_rate": 1.1337269061383278e-05, + "loss": 1.8359, + "step": 772 + }, + { + "epoch": 4.74, + "learning_rate": 1.1317573989162727e-05, + "loss": 1.8535, + "step": 773 + }, + { + "epoch": 4.75, + "learning_rate": 1.129787371445672e-05, + "loss": 1.7793, + "step": 774 + }, + { + "epoch": 4.75, + "learning_rate": 1.1278168315052445e-05, + "loss": 1.834, + "step": 775 + }, + { + "epoch": 4.76, + "learning_rate": 1.1258457868757352e-05, + "loss": 1.8906, + "step": 776 + }, + { + "epoch": 4.77, + "learning_rate": 1.1238742453398794e-05, + "loss": 1.9512, + "step": 777 + }, + { + "epoch": 4.77, + "learning_rate": 1.1219022146823762e-05, + "loss": 1.8047, + "step": 778 + }, + { + "epoch": 4.78, + "learning_rate": 1.1199297026898547e-05, + "loss": 1.627, + "step": 779 + }, + { + "epoch": 4.79, + "learning_rate": 1.1179567171508463e-05, + "loss": 1.8242, + "step": 780 + }, + { + "epoch": 4.79, + "learning_rate": 1.1159832658557498e-05, + "loss": 1.7129, + "step": 781 + }, + { + "epoch": 4.8, + "learning_rate": 1.1140093565968055e-05, + "loss": 1.7012, + "step": 782 + }, + { + "epoch": 4.8, + "learning_rate": 1.1120349971680605e-05, + "loss": 1.8145, + "step": 783 + }, + { + "epoch": 4.81, + "learning_rate": 1.1100601953653393e-05, + "loss": 1.6426, + "step": 784 + }, + { + "epoch": 4.82, + "learning_rate": 1.1080849589862142e-05, + "loss": 1.8574, + "step": 785 + }, + { + "epoch": 4.82, + "learning_rate": 1.1061092958299727e-05, + "loss": 1.752, + "step": 786 + }, + { + "epoch": 4.83, + "learning_rate": 1.1041332136975874e-05, + "loss": 1.9531, + "step": 787 + }, + { + "epoch": 4.83, + "learning_rate": 1.1021567203916861e-05, + "loss": 1.7676, + "step": 788 + }, + { + "epoch": 4.84, + "learning_rate": 1.1001798237165185e-05, + "loss": 1.7656, + "step": 789 + }, + { + "epoch": 4.85, + "learning_rate": 1.0982025314779287e-05, + "loss": 1.9512, + "step": 790 + }, + { + "epoch": 4.85, + "learning_rate": 1.0962248514833218e-05, + "loss": 1.791, + "step": 791 + }, + { + "epoch": 4.86, + "learning_rate": 1.0942467915416342e-05, + "loss": 1.8398, + "step": 792 + }, + { + "epoch": 4.87, + "learning_rate": 1.092268359463302e-05, + "loss": 1.6797, + "step": 793 + }, + { + "epoch": 4.87, + "learning_rate": 1.090289563060232e-05, + "loss": 1.7871, + "step": 794 + }, + { + "epoch": 4.88, + "learning_rate": 1.088310410145768e-05, + "loss": 1.6738, + "step": 795 + }, + { + "epoch": 4.88, + "learning_rate": 1.086330908534663e-05, + "loss": 1.8711, + "step": 796 + }, + { + "epoch": 4.89, + "learning_rate": 1.0843510660430447e-05, + "loss": 1.752, + "step": 797 + }, + { + "epoch": 4.9, + "learning_rate": 1.0823708904883898e-05, + "loss": 1.9297, + "step": 798 + }, + { + "epoch": 4.9, + "learning_rate": 1.0803903896894877e-05, + "loss": 1.9141, + "step": 799 + }, + { + "epoch": 4.91, + "learning_rate": 1.0784095714664124e-05, + "loss": 1.7188, + "step": 800 + }, + { + "epoch": 4.91, + "learning_rate": 1.0764284436404924e-05, + "loss": 1.7441, + "step": 801 + }, + { + "epoch": 4.92, + "learning_rate": 1.0744470140342775e-05, + "loss": 1.7266, + "step": 802 + }, + { + "epoch": 4.93, + "learning_rate": 1.0724652904715091e-05, + "loss": 1.832, + "step": 803 + }, + { + "epoch": 4.93, + "learning_rate": 1.0704832807770909e-05, + "loss": 1.6152, + "step": 804 + }, + { + "epoch": 4.94, + "learning_rate": 1.0685009927770542e-05, + "loss": 1.8281, + "step": 805 + }, + { + "epoch": 4.94, + "learning_rate": 1.0665184342985306e-05, + "loss": 1.7812, + "step": 806 + }, + { + "epoch": 4.95, + "learning_rate": 1.064535613169719e-05, + "loss": 1.875, + "step": 807 + }, + { + "epoch": 4.96, + "learning_rate": 1.0625525372198564e-05, + "loss": 1.748, + "step": 808 + }, + { + "epoch": 4.96, + "learning_rate": 1.0605692142791846e-05, + "loss": 1.7148, + "step": 809 + }, + { + "epoch": 4.97, + "learning_rate": 1.0585856521789215e-05, + "loss": 1.7715, + "step": 810 + }, + { + "epoch": 4.98, + "learning_rate": 1.056601858751229e-05, + "loss": 1.7676, + "step": 811 + }, + { + "epoch": 4.98, + "learning_rate": 1.0546178418291833e-05, + "loss": 1.7852, + "step": 812 + }, + { + "epoch": 4.99, + "learning_rate": 1.0526336092467414e-05, + "loss": 1.9141, + "step": 813 + }, + { + "epoch": 4.99, + "learning_rate": 1.0506491688387128e-05, + "loss": 1.6602, + "step": 814 + }, + { + "epoch": 5.0, + "learning_rate": 1.0486645284407282e-05, + "loss": 1.75, + "step": 815 + }, + { + "epoch": 5.01, + "learning_rate": 1.0466796958892071e-05, + "loss": 1.5469, + "step": 816 + }, + { + "epoch": 5.01, + "learning_rate": 1.0446946790213275e-05, + "loss": 1.2852, + "step": 817 + }, + { + "epoch": 5.02, + "learning_rate": 1.0427094856749966e-05, + "loss": 1.3926, + "step": 818 + }, + { + "epoch": 5.02, + "learning_rate": 1.0407241236888164e-05, + "loss": 1.293, + "step": 819 + }, + { + "epoch": 5.03, + "learning_rate": 1.0387386009020569e-05, + "loss": 1.2559, + "step": 820 + }, + { + "epoch": 5.04, + "learning_rate": 1.0367529251546208e-05, + "loss": 1.3379, + "step": 821 + }, + { + "epoch": 5.04, + "learning_rate": 1.034767104287017e-05, + "loss": 1.3047, + "step": 822 + }, + { + "epoch": 5.05, + "learning_rate": 1.032781146140326e-05, + "loss": 1.3105, + "step": 823 + }, + { + "epoch": 5.06, + "learning_rate": 1.0307950585561705e-05, + "loss": 1.3203, + "step": 824 + }, + { + "epoch": 5.06, + "learning_rate": 1.0288088493766846e-05, + "loss": 1.2461, + "step": 825 + }, + { + "epoch": 5.07, + "learning_rate": 1.0268225264444829e-05, + "loss": 1.3281, + "step": 826 + }, + { + "epoch": 5.07, + "learning_rate": 1.0248360976026279e-05, + "loss": 1.1758, + "step": 827 + }, + { + "epoch": 5.08, + "learning_rate": 1.0228495706946015e-05, + "loss": 1.1465, + "step": 828 + }, + { + "epoch": 5.09, + "learning_rate": 1.0208629535642726e-05, + "loss": 1.1836, + "step": 829 + }, + { + "epoch": 5.09, + "learning_rate": 1.0188762540558657e-05, + "loss": 1.1504, + "step": 830 + }, + { + "epoch": 5.1, + "learning_rate": 1.0168894800139311e-05, + "loss": 1.1641, + "step": 831 + }, + { + "epoch": 5.1, + "learning_rate": 1.0149026392833137e-05, + "loss": 1.1504, + "step": 832 + }, + { + "epoch": 5.11, + "learning_rate": 1.0129157397091208e-05, + "loss": 1.2832, + "step": 833 + }, + { + "epoch": 5.12, + "learning_rate": 1.010928789136693e-05, + "loss": 1.25, + "step": 834 + }, + { + "epoch": 5.12, + "learning_rate": 1.0089417954115715e-05, + "loss": 1.2207, + "step": 835 + }, + { + "epoch": 5.13, + "learning_rate": 1.0069547663794682e-05, + "loss": 1.1855, + "step": 836 + }, + { + "epoch": 5.13, + "learning_rate": 1.0049677098862347e-05, + "loss": 1.1289, + "step": 837 + }, + { + "epoch": 5.14, + "learning_rate": 1.002980633777831e-05, + "loss": 1.1562, + "step": 838 + }, + { + "epoch": 5.15, + "learning_rate": 1.0009935459002935e-05, + "loss": 1.3242, + "step": 839 + }, + { + "epoch": 5.15, + "learning_rate": 9.990064540997066e-06, + "loss": 1.3105, + "step": 840 + }, + { + "epoch": 5.16, + "learning_rate": 9.970193662221694e-06, + "loss": 1.3145, + "step": 841 + }, + { + "epoch": 5.17, + "learning_rate": 9.950322901137655e-06, + "loss": 1.2441, + "step": 842 + }, + { + "epoch": 5.17, + "learning_rate": 9.93045233620532e-06, + "loss": 1.3262, + "step": 843 + }, + { + "epoch": 5.18, + "learning_rate": 9.910582045884292e-06, + "loss": 1.2656, + "step": 844 + }, + { + "epoch": 5.18, + "learning_rate": 9.890712108633076e-06, + "loss": 1.3633, + "step": 845 + }, + { + "epoch": 5.19, + "learning_rate": 9.870842602908794e-06, + "loss": 1.2734, + "step": 846 + }, + { + "epoch": 5.2, + "learning_rate": 9.850973607166865e-06, + "loss": 1.2656, + "step": 847 + }, + { + "epoch": 5.2, + "learning_rate": 9.83110519986069e-06, + "loss": 1.2949, + "step": 848 + }, + { + "epoch": 5.21, + "learning_rate": 9.811237459441346e-06, + "loss": 1.2227, + "step": 849 + }, + { + "epoch": 5.21, + "learning_rate": 9.791370464357279e-06, + "loss": 1.2793, + "step": 850 + }, + { + "epoch": 5.22, + "learning_rate": 9.771504293053985e-06, + "loss": 1.3633, + "step": 851 + }, + { + "epoch": 5.23, + "learning_rate": 9.751639023973724e-06, + "loss": 1.207, + "step": 852 + }, + { + "epoch": 5.23, + "learning_rate": 9.731774735555174e-06, + "loss": 1.252, + "step": 853 + }, + { + "epoch": 5.24, + "learning_rate": 9.711911506233157e-06, + "loss": 1.1992, + "step": 854 + }, + { + "epoch": 5.25, + "learning_rate": 9.692049414438298e-06, + "loss": 1.3516, + "step": 855 + }, + { + "epoch": 5.25, + "learning_rate": 9.672188538596746e-06, + "loss": 1.3574, + "step": 856 + }, + { + "epoch": 5.26, + "learning_rate": 9.652328957129831e-06, + "loss": 1.4062, + "step": 857 + }, + { + "epoch": 5.26, + "learning_rate": 9.632470748453794e-06, + "loss": 1.3223, + "step": 858 + }, + { + "epoch": 5.27, + "learning_rate": 9.612613990979436e-06, + "loss": 1.2207, + "step": 859 + }, + { + "epoch": 5.28, + "learning_rate": 9.59275876311184e-06, + "loss": 1.2441, + "step": 860 + }, + { + "epoch": 5.28, + "learning_rate": 9.572905143250039e-06, + "loss": 1.0586, + "step": 861 + }, + { + "epoch": 5.29, + "learning_rate": 9.553053209786725e-06, + "loss": 1.2148, + "step": 862 + }, + { + "epoch": 5.29, + "learning_rate": 9.53320304110793e-06, + "loss": 1.2402, + "step": 863 + }, + { + "epoch": 5.3, + "learning_rate": 9.513354715592721e-06, + "loss": 1.1338, + "step": 864 + }, + { + "epoch": 5.31, + "learning_rate": 9.493508311612874e-06, + "loss": 1.332, + "step": 865 + }, + { + "epoch": 5.31, + "learning_rate": 9.473663907532593e-06, + "loss": 1.2715, + "step": 866 + }, + { + "epoch": 5.32, + "learning_rate": 9.453821581708174e-06, + "loss": 1.2793, + "step": 867 + }, + { + "epoch": 5.33, + "learning_rate": 9.433981412487711e-06, + "loss": 1.2969, + "step": 868 + }, + { + "epoch": 5.33, + "learning_rate": 9.414143478210786e-06, + "loss": 1.1074, + "step": 869 + }, + { + "epoch": 5.34, + "learning_rate": 9.394307857208158e-06, + "loss": 1.1924, + "step": 870 + }, + { + "epoch": 5.34, + "learning_rate": 9.374474627801439e-06, + "loss": 1.2188, + "step": 871 + }, + { + "epoch": 5.35, + "learning_rate": 9.354643868302813e-06, + "loss": 1.2246, + "step": 872 + }, + { + "epoch": 5.36, + "learning_rate": 9.334815657014696e-06, + "loss": 1.2109, + "step": 873 + }, + { + "epoch": 5.36, + "learning_rate": 9.314990072229461e-06, + "loss": 1.2832, + "step": 874 + }, + { + "epoch": 5.37, + "learning_rate": 9.295167192229093e-06, + "loss": 1.2666, + "step": 875 + }, + { + "epoch": 5.37, + "learning_rate": 9.27534709528491e-06, + "loss": 1.3066, + "step": 876 + }, + { + "epoch": 5.38, + "learning_rate": 9.25552985965723e-06, + "loss": 1.5352, + "step": 877 + }, + { + "epoch": 5.39, + "learning_rate": 9.235715563595082e-06, + "loss": 1.2305, + "step": 878 + }, + { + "epoch": 5.39, + "learning_rate": 9.215904285335876e-06, + "loss": 1.1113, + "step": 879 + }, + { + "epoch": 5.4, + "learning_rate": 9.196096103105127e-06, + "loss": 1.2285, + "step": 880 + }, + { + "epoch": 5.4, + "learning_rate": 9.176291095116104e-06, + "loss": 1.2871, + "step": 881 + }, + { + "epoch": 5.41, + "learning_rate": 9.156489339569555e-06, + "loss": 1.2539, + "step": 882 + }, + { + "epoch": 5.42, + "learning_rate": 9.136690914653377e-06, + "loss": 1.2666, + "step": 883 + }, + { + "epoch": 5.42, + "learning_rate": 9.11689589854232e-06, + "loss": 1.2539, + "step": 884 + }, + { + "epoch": 5.43, + "learning_rate": 9.097104369397681e-06, + "loss": 1.1562, + "step": 885 + }, + { + "epoch": 5.44, + "learning_rate": 9.07731640536698e-06, + "loss": 1.2148, + "step": 886 + }, + { + "epoch": 5.44, + "learning_rate": 9.057532084583662e-06, + "loss": 1.3848, + "step": 887 + }, + { + "epoch": 5.45, + "learning_rate": 9.037751485166785e-06, + "loss": 1.2832, + "step": 888 + }, + { + "epoch": 5.45, + "learning_rate": 9.017974685220716e-06, + "loss": 1.2832, + "step": 889 + }, + { + "epoch": 5.46, + "learning_rate": 8.998201762834815e-06, + "loss": 1.3906, + "step": 890 + }, + { + "epoch": 5.47, + "learning_rate": 8.97843279608314e-06, + "loss": 1.2539, + "step": 891 + }, + { + "epoch": 5.47, + "learning_rate": 8.958667863024127e-06, + "loss": 1.168, + "step": 892 + }, + { + "epoch": 5.48, + "learning_rate": 8.938907041700275e-06, + "loss": 1.3086, + "step": 893 + }, + { + "epoch": 5.48, + "learning_rate": 8.919150410137862e-06, + "loss": 1.2656, + "step": 894 + }, + { + "epoch": 5.49, + "learning_rate": 8.899398046346608e-06, + "loss": 1.209, + "step": 895 + }, + { + "epoch": 5.5, + "learning_rate": 8.8796500283194e-06, + "loss": 1.2852, + "step": 896 + }, + { + "epoch": 5.5, + "learning_rate": 8.859906434031947e-06, + "loss": 1.1504, + "step": 897 + }, + { + "epoch": 5.51, + "learning_rate": 8.840167341442505e-06, + "loss": 1.0957, + "step": 898 + }, + { + "epoch": 5.52, + "learning_rate": 8.820432828491542e-06, + "loss": 1.2148, + "step": 899 + }, + { + "epoch": 5.52, + "learning_rate": 8.800702973101454e-06, + "loss": 1.2832, + "step": 900 + }, + { + "epoch": 5.53, + "learning_rate": 8.78097785317624e-06, + "loss": 1.252, + "step": 901 + }, + { + "epoch": 5.53, + "learning_rate": 8.761257546601209e-06, + "loss": 1.3633, + "step": 902 + }, + { + "epoch": 5.54, + "learning_rate": 8.741542131242652e-06, + "loss": 1.2246, + "step": 903 + }, + { + "epoch": 5.55, + "learning_rate": 8.721831684947557e-06, + "loss": 1.2148, + "step": 904 + }, + { + "epoch": 5.55, + "learning_rate": 8.702126285543286e-06, + "loss": 1.127, + "step": 905 + }, + { + "epoch": 5.56, + "learning_rate": 8.682426010837274e-06, + "loss": 1.25, + "step": 906 + }, + { + "epoch": 5.56, + "learning_rate": 8.662730938616724e-06, + "loss": 1.2031, + "step": 907 + }, + { + "epoch": 5.57, + "learning_rate": 8.643041146648299e-06, + "loss": 1.2246, + "step": 908 + }, + { + "epoch": 5.58, + "learning_rate": 8.6233567126778e-06, + "loss": 1.3438, + "step": 909 + }, + { + "epoch": 5.58, + "learning_rate": 8.603677714429888e-06, + "loss": 1.2852, + "step": 910 + }, + { + "epoch": 5.59, + "learning_rate": 8.584004229607747e-06, + "loss": 1.418, + "step": 911 + }, + { + "epoch": 5.6, + "learning_rate": 8.564336335892798e-06, + "loss": 1.3105, + "step": 912 + }, + { + "epoch": 5.6, + "learning_rate": 8.54467411094439e-06, + "loss": 1.2422, + "step": 913 + }, + { + "epoch": 5.61, + "learning_rate": 8.52501763239948e-06, + "loss": 1.2373, + "step": 914 + }, + { + "epoch": 5.61, + "learning_rate": 8.505366977872336e-06, + "loss": 1.2637, + "step": 915 + }, + { + "epoch": 5.62, + "learning_rate": 8.485722224954237e-06, + "loss": 1.3906, + "step": 916 + }, + { + "epoch": 5.63, + "learning_rate": 8.466083451213145e-06, + "loss": 1.1748, + "step": 917 + }, + { + "epoch": 5.63, + "learning_rate": 8.446450734193437e-06, + "loss": 1.2949, + "step": 918 + }, + { + "epoch": 5.64, + "learning_rate": 8.426824151415548e-06, + "loss": 1.125, + "step": 919 + }, + { + "epoch": 5.64, + "learning_rate": 8.407203780375711e-06, + "loss": 1.2539, + "step": 920 + }, + { + "epoch": 5.65, + "learning_rate": 8.38758969854562e-06, + "loss": 1.2305, + "step": 921 + }, + { + "epoch": 5.66, + "learning_rate": 8.367981983372143e-06, + "loss": 1.1523, + "step": 922 + }, + { + "epoch": 5.66, + "learning_rate": 8.348380712277002e-06, + "loss": 1.2285, + "step": 923 + }, + { + "epoch": 5.67, + "learning_rate": 8.32878596265649e-06, + "loss": 1.3281, + "step": 924 + }, + { + "epoch": 5.67, + "learning_rate": 8.309197811881128e-06, + "loss": 1.3379, + "step": 925 + }, + { + "epoch": 5.68, + "learning_rate": 8.289616337295396e-06, + "loss": 1.2891, + "step": 926 + }, + { + "epoch": 5.69, + "learning_rate": 8.270041616217407e-06, + "loss": 1.2441, + "step": 927 + }, + { + "epoch": 5.69, + "learning_rate": 8.250473725938608e-06, + "loss": 1.3652, + "step": 928 + }, + { + "epoch": 5.7, + "learning_rate": 8.23091274372348e-06, + "loss": 1.1523, + "step": 929 + }, + { + "epoch": 5.71, + "learning_rate": 8.211358746809225e-06, + "loss": 1.2637, + "step": 930 + }, + { + "epoch": 5.71, + "learning_rate": 8.191811812405453e-06, + "loss": 1.3184, + "step": 931 + }, + { + "epoch": 5.72, + "learning_rate": 8.172272017693903e-06, + "loss": 1.2676, + "step": 932 + }, + { + "epoch": 5.72, + "learning_rate": 8.15273943982811e-06, + "loss": 1.1836, + "step": 933 + }, + { + "epoch": 5.73, + "learning_rate": 8.133214155933118e-06, + "loss": 1.1533, + "step": 934 + }, + { + "epoch": 5.74, + "learning_rate": 8.113696243105175e-06, + "loss": 1.1562, + "step": 935 + }, + { + "epoch": 5.74, + "learning_rate": 8.09418577841142e-06, + "loss": 1.3008, + "step": 936 + }, + { + "epoch": 5.75, + "learning_rate": 8.074682838889581e-06, + "loss": 1.3379, + "step": 937 + }, + { + "epoch": 5.75, + "learning_rate": 8.055187501547674e-06, + "loss": 1.2012, + "step": 938 + }, + { + "epoch": 5.76, + "learning_rate": 8.035699843363696e-06, + "loss": 1.1484, + "step": 939 + }, + { + "epoch": 5.77, + "learning_rate": 8.01621994128533e-06, + "loss": 1.293, + "step": 940 + }, + { + "epoch": 5.77, + "learning_rate": 7.996747872229624e-06, + "loss": 1.3223, + "step": 941 + }, + { + "epoch": 5.78, + "learning_rate": 7.977283713082706e-06, + "loss": 1.3105, + "step": 942 + }, + { + "epoch": 5.79, + "learning_rate": 7.95782754069946e-06, + "loss": 1.207, + "step": 943 + }, + { + "epoch": 5.79, + "learning_rate": 7.938379431903243e-06, + "loss": 1.1992, + "step": 944 + }, + { + "epoch": 5.8, + "learning_rate": 7.91893946348557e-06, + "loss": 1.1582, + "step": 945 + }, + { + "epoch": 5.8, + "learning_rate": 7.899507712205818e-06, + "loss": 1.168, + "step": 946 + }, + { + "epoch": 5.81, + "learning_rate": 7.880084254790911e-06, + "loss": 1.3105, + "step": 947 + }, + { + "epoch": 5.82, + "learning_rate": 7.860669167935028e-06, + "loss": 1.2988, + "step": 948 + }, + { + "epoch": 5.82, + "learning_rate": 7.841262528299296e-06, + "loss": 1.1211, + "step": 949 + }, + { + "epoch": 5.83, + "learning_rate": 7.821864412511485e-06, + "loss": 1.2832, + "step": 950 + }, + { + "epoch": 5.83, + "learning_rate": 7.802474897165716e-06, + "loss": 1.0977, + "step": 951 + }, + { + "epoch": 5.84, + "learning_rate": 7.783094058822147e-06, + "loss": 1.0918, + "step": 952 + }, + { + "epoch": 5.85, + "learning_rate": 7.76372197400667e-06, + "loss": 1.2617, + "step": 953 + }, + { + "epoch": 5.85, + "learning_rate": 7.74435871921062e-06, + "loss": 1.2793, + "step": 954 + }, + { + "epoch": 5.86, + "learning_rate": 7.72500437089046e-06, + "loss": 1.2402, + "step": 955 + }, + { + "epoch": 5.87, + "learning_rate": 7.705659005467489e-06, + "loss": 1.2344, + "step": 956 + }, + { + "epoch": 5.87, + "learning_rate": 7.68632269932754e-06, + "loss": 1.2832, + "step": 957 + }, + { + "epoch": 5.88, + "learning_rate": 7.666995528820673e-06, + "loss": 1.2402, + "step": 958 + }, + { + "epoch": 5.88, + "learning_rate": 7.647677570260868e-06, + "loss": 1.3262, + "step": 959 + }, + { + "epoch": 5.89, + "learning_rate": 7.628368899925744e-06, + "loss": 1.2695, + "step": 960 + }, + { + "epoch": 5.9, + "learning_rate": 7.609069594056234e-06, + "loss": 1.2031, + "step": 961 + }, + { + "epoch": 5.9, + "learning_rate": 7.589779728856307e-06, + "loss": 1.1484, + "step": 962 + }, + { + "epoch": 5.91, + "learning_rate": 7.570499380492641e-06, + "loss": 1.3203, + "step": 963 + }, + { + "epoch": 5.91, + "learning_rate": 7.551228625094349e-06, + "loss": 1.2754, + "step": 964 + }, + { + "epoch": 5.92, + "learning_rate": 7.5319675387526555e-06, + "loss": 1.2559, + "step": 965 + }, + { + "epoch": 5.93, + "learning_rate": 7.512716197520614e-06, + "loss": 1.209, + "step": 966 + }, + { + "epoch": 5.93, + "learning_rate": 7.493474677412795e-06, + "loss": 1.1875, + "step": 967 + }, + { + "epoch": 5.94, + "learning_rate": 7.4742430544049945e-06, + "loss": 1.2168, + "step": 968 + }, + { + "epoch": 5.94, + "learning_rate": 7.4550214044339256e-06, + "loss": 1.209, + "step": 969 + }, + { + "epoch": 5.95, + "learning_rate": 7.435809803396923e-06, + "loss": 1.25, + "step": 970 + }, + { + "epoch": 5.96, + "learning_rate": 7.416608327151642e-06, + "loss": 1.1211, + "step": 971 + }, + { + "epoch": 5.96, + "learning_rate": 7.397417051515758e-06, + "loss": 1.1113, + "step": 972 + }, + { + "epoch": 5.97, + "learning_rate": 7.37823605226668e-06, + "loss": 1.2422, + "step": 973 + }, + { + "epoch": 5.98, + "learning_rate": 7.359065405141228e-06, + "loss": 1.2363, + "step": 974 + }, + { + "epoch": 5.98, + "learning_rate": 7.33990518583535e-06, + "loss": 1.1338, + "step": 975 + }, + { + "epoch": 5.99, + "learning_rate": 7.320755470003822e-06, + "loss": 1.0918, + "step": 976 + }, + { + "epoch": 5.99, + "learning_rate": 7.301616333259942e-06, + "loss": 1.3027, + "step": 977 + }, + { + "epoch": 6.0, + "learning_rate": 7.282487851175237e-06, + "loss": 1.0625, + "step": 978 + }, + { + "epoch": 6.01, + "learning_rate": 7.263370099279173e-06, + "loss": 0.792, + "step": 979 + }, + { + "epoch": 6.01, + "learning_rate": 7.244263153058835e-06, + "loss": 0.9102, + "step": 980 + }, + { + "epoch": 6.02, + "learning_rate": 7.225167087958647e-06, + "loss": 0.832, + "step": 981 + }, + { + "epoch": 6.02, + "learning_rate": 7.2060819793800665e-06, + "loss": 0.8662, + "step": 982 + }, + { + "epoch": 6.03, + "learning_rate": 7.187007902681289e-06, + "loss": 0.8164, + "step": 983 + }, + { + "epoch": 6.04, + "learning_rate": 7.16794493317696e-06, + "loss": 0.8496, + "step": 984 + }, + { + "epoch": 6.04, + "learning_rate": 7.148893146137852e-06, + "loss": 0.9854, + "step": 985 + }, + { + "epoch": 6.05, + "learning_rate": 7.129852616790594e-06, + "loss": 0.8486, + "step": 986 + }, + { + "epoch": 6.06, + "learning_rate": 7.110823420317356e-06, + "loss": 0.8359, + "step": 987 + }, + { + "epoch": 6.06, + "learning_rate": 7.091805631855566e-06, + "loss": 0.7695, + "step": 988 + }, + { + "epoch": 6.07, + "learning_rate": 7.072799326497603e-06, + "loss": 0.8828, + "step": 989 + }, + { + "epoch": 6.07, + "learning_rate": 7.053804579290513e-06, + "loss": 0.9307, + "step": 990 + }, + { + "epoch": 6.08, + "learning_rate": 7.034821465235693e-06, + "loss": 0.7568, + "step": 991 + }, + { + "epoch": 6.09, + "learning_rate": 7.0158500592886115e-06, + "loss": 0.8779, + "step": 992 + }, + { + "epoch": 6.09, + "learning_rate": 6.996890436358505e-06, + "loss": 0.9648, + "step": 993 + }, + { + "epoch": 6.1, + "learning_rate": 6.977942671308087e-06, + "loss": 0.7734, + "step": 994 + }, + { + "epoch": 6.1, + "learning_rate": 6.95900683895325e-06, + "loss": 0.8066, + "step": 995 + }, + { + "epoch": 6.11, + "learning_rate": 6.9400830140627705e-06, + "loss": 0.9189, + "step": 996 + }, + { + "epoch": 6.12, + "learning_rate": 6.921171271358007e-06, + "loss": 0.8271, + "step": 997 + }, + { + "epoch": 6.12, + "learning_rate": 6.902271685512616e-06, + "loss": 0.9258, + "step": 998 + }, + { + "epoch": 6.13, + "learning_rate": 6.883384331152254e-06, + "loss": 0.9004, + "step": 999 + }, + { + "epoch": 6.13, + "learning_rate": 6.864509282854272e-06, + "loss": 0.8652, + "step": 1000 + }, + { + "epoch": 6.14, + "learning_rate": 6.845646615147445e-06, + "loss": 0.8779, + "step": 1001 + }, + { + "epoch": 6.15, + "learning_rate": 6.826796402511653e-06, + "loss": 0.8105, + "step": 1002 + }, + { + "epoch": 6.15, + "learning_rate": 6.8079587193775935e-06, + "loss": 0.9023, + "step": 1003 + }, + { + "epoch": 6.16, + "learning_rate": 6.789133640126498e-06, + "loss": 0.8877, + "step": 1004 + }, + { + "epoch": 6.17, + "learning_rate": 6.770321239089825e-06, + "loss": 0.9209, + "step": 1005 + }, + { + "epoch": 6.17, + "learning_rate": 6.751521590548986e-06, + "loss": 0.8389, + "step": 1006 + }, + { + "epoch": 6.18, + "learning_rate": 6.732734768735021e-06, + "loss": 0.8125, + "step": 1007 + }, + { + "epoch": 6.18, + "learning_rate": 6.713960847828335e-06, + "loss": 0.8408, + "step": 1008 + }, + { + "epoch": 6.19, + "learning_rate": 6.695199901958386e-06, + "loss": 0.9258, + "step": 1009 + }, + { + "epoch": 6.2, + "learning_rate": 6.6764520052034054e-06, + "loss": 0.8213, + "step": 1010 + }, + { + "epoch": 6.2, + "learning_rate": 6.657717231590095e-06, + "loss": 0.8838, + "step": 1011 + }, + { + "epoch": 6.21, + "learning_rate": 6.638995655093351e-06, + "loss": 0.667, + "step": 1012 + }, + { + "epoch": 6.21, + "learning_rate": 6.620287349635942e-06, + "loss": 0.9072, + "step": 1013 + }, + { + "epoch": 6.22, + "learning_rate": 6.601592389088251e-06, + "loss": 0.8184, + "step": 1014 + }, + { + "epoch": 6.23, + "learning_rate": 6.582910847267957e-06, + "loss": 0.9688, + "step": 1015 + }, + { + "epoch": 6.23, + "learning_rate": 6.564242797939759e-06, + "loss": 0.7861, + "step": 1016 + }, + { + "epoch": 6.24, + "learning_rate": 6.545588314815088e-06, + "loss": 0.9268, + "step": 1017 + }, + { + "epoch": 6.25, + "learning_rate": 6.526947471551799e-06, + "loss": 0.7949, + "step": 1018 + }, + { + "epoch": 6.25, + "learning_rate": 6.508320341753889e-06, + "loss": 0.8994, + "step": 1019 + }, + { + "epoch": 6.26, + "learning_rate": 6.489706998971212e-06, + "loss": 0.8193, + "step": 1020 + }, + { + "epoch": 6.26, + "learning_rate": 6.471107516699183e-06, + "loss": 0.877, + "step": 1021 + }, + { + "epoch": 6.27, + "learning_rate": 6.452521968378482e-06, + "loss": 0.8525, + "step": 1022 + }, + { + "epoch": 6.28, + "learning_rate": 6.4339504273947805e-06, + "loss": 0.8115, + "step": 1023 + }, + { + "epoch": 6.28, + "learning_rate": 6.415392967078438e-06, + "loss": 0.8262, + "step": 1024 + }, + { + "epoch": 6.29, + "learning_rate": 6.396849660704205e-06, + "loss": 0.9258, + "step": 1025 + }, + { + "epoch": 6.29, + "learning_rate": 6.378320581490962e-06, + "loss": 0.873, + "step": 1026 + }, + { + "epoch": 6.3, + "learning_rate": 6.3598058026013995e-06, + "loss": 0.9082, + "step": 1027 + }, + { + "epoch": 6.31, + "learning_rate": 6.3413053971417575e-06, + "loss": 0.9756, + "step": 1028 + }, + { + "epoch": 6.31, + "learning_rate": 6.322819438161502e-06, + "loss": 0.7363, + "step": 1029 + }, + { + "epoch": 6.32, + "learning_rate": 6.304347998653074e-06, + "loss": 0.835, + "step": 1030 + }, + { + "epoch": 6.33, + "learning_rate": 6.285891151551573e-06, + "loss": 0.8457, + "step": 1031 + }, + { + "epoch": 6.33, + "learning_rate": 6.267448969734486e-06, + "loss": 0.833, + "step": 1032 + }, + { + "epoch": 6.34, + "learning_rate": 6.24902152602139e-06, + "loss": 0.7949, + "step": 1033 + }, + { + "epoch": 6.34, + "learning_rate": 6.2306088931736766e-06, + "loss": 0.9092, + "step": 1034 + }, + { + "epoch": 6.35, + "learning_rate": 6.21221114389424e-06, + "loss": 0.8643, + "step": 1035 + }, + { + "epoch": 6.36, + "learning_rate": 6.193828350827222e-06, + "loss": 0.8809, + "step": 1036 + }, + { + "epoch": 6.36, + "learning_rate": 6.175460586557701e-06, + "loss": 0.8662, + "step": 1037 + }, + { + "epoch": 6.37, + "learning_rate": 6.157107923611412e-06, + "loss": 0.8682, + "step": 1038 + }, + { + "epoch": 6.37, + "learning_rate": 6.1387704344544684e-06, + "loss": 0.8701, + "step": 1039 + }, + { + "epoch": 6.38, + "learning_rate": 6.120448191493071e-06, + "loss": 0.791, + "step": 1040 + }, + { + "epoch": 6.39, + "learning_rate": 6.102141267073207e-06, + "loss": 0.8857, + "step": 1041 + }, + { + "epoch": 6.39, + "learning_rate": 6.083849733480394e-06, + "loss": 0.8623, + "step": 1042 + }, + { + "epoch": 6.4, + "learning_rate": 6.065573662939367e-06, + "loss": 0.8105, + "step": 1043 + }, + { + "epoch": 6.4, + "learning_rate": 6.047313127613808e-06, + "loss": 0.9443, + "step": 1044 + }, + { + "epoch": 6.41, + "learning_rate": 6.0290681996060605e-06, + "loss": 0.7783, + "step": 1045 + }, + { + "epoch": 6.42, + "learning_rate": 6.010838950956841e-06, + "loss": 0.8701, + "step": 1046 + }, + { + "epoch": 6.42, + "learning_rate": 5.992625453644953e-06, + "loss": 0.8672, + "step": 1047 + }, + { + "epoch": 6.43, + "learning_rate": 5.974427779587004e-06, + "loss": 0.8262, + "step": 1048 + }, + { + "epoch": 6.44, + "learning_rate": 5.9562460006371295e-06, + "loss": 0.8818, + "step": 1049 + }, + { + "epoch": 6.44, + "learning_rate": 5.938080188586699e-06, + "loss": 0.7998, + "step": 1050 + }, + { + "epoch": 6.45, + "learning_rate": 5.919930415164033e-06, + "loss": 0.7217, + "step": 1051 + }, + { + "epoch": 6.45, + "learning_rate": 5.901796752034128e-06, + "loss": 0.8486, + "step": 1052 + }, + { + "epoch": 6.46, + "learning_rate": 5.883679270798363e-06, + "loss": 0.7949, + "step": 1053 + }, + { + "epoch": 6.47, + "learning_rate": 5.865578042994227e-06, + "loss": 0.9209, + "step": 1054 + }, + { + "epoch": 6.47, + "learning_rate": 5.84749314009503e-06, + "loss": 0.8779, + "step": 1055 + }, + { + "epoch": 6.48, + "learning_rate": 5.829424633509627e-06, + "loss": 0.9678, + "step": 1056 + }, + { + "epoch": 6.48, + "learning_rate": 5.8113725945821245e-06, + "loss": 0.7764, + "step": 1057 + }, + { + "epoch": 6.49, + "learning_rate": 5.7933370945916036e-06, + "loss": 0.8252, + "step": 1058 + }, + { + "epoch": 6.5, + "learning_rate": 5.775318204751854e-06, + "loss": 0.8438, + "step": 1059 + }, + { + "epoch": 6.5, + "learning_rate": 5.757315996211066e-06, + "loss": 0.7744, + "step": 1060 + }, + { + "epoch": 6.51, + "learning_rate": 5.7393305400515755e-06, + "loss": 0.8027, + "step": 1061 + }, + { + "epoch": 6.52, + "learning_rate": 5.721361907289556e-06, + "loss": 0.834, + "step": 1062 + }, + { + "epoch": 6.52, + "learning_rate": 5.703410168874768e-06, + "loss": 0.8496, + "step": 1063 + }, + { + "epoch": 6.53, + "learning_rate": 5.685475395690259e-06, + "loss": 1.0342, + "step": 1064 + }, + { + "epoch": 6.53, + "learning_rate": 5.667557658552078e-06, + "loss": 0.8789, + "step": 1065 + }, + { + "epoch": 6.54, + "learning_rate": 5.649657028209024e-06, + "loss": 0.7568, + "step": 1066 + }, + { + "epoch": 6.55, + "learning_rate": 5.631773575342343e-06, + "loss": 0.791, + "step": 1067 + }, + { + "epoch": 6.55, + "learning_rate": 5.61390737056545e-06, + "loss": 0.9238, + "step": 1068 + }, + { + "epoch": 6.56, + "learning_rate": 5.5960584844236565e-06, + "loss": 0.7002, + "step": 1069 + }, + { + "epoch": 6.56, + "learning_rate": 5.5782269873939e-06, + "loss": 0.8096, + "step": 1070 + }, + { + "epoch": 6.57, + "learning_rate": 5.560412949884442e-06, + "loss": 0.8545, + "step": 1071 + }, + { + "epoch": 6.58, + "learning_rate": 5.542616442234618e-06, + "loss": 0.8203, + "step": 1072 + }, + { + "epoch": 6.58, + "learning_rate": 5.52483753471454e-06, + "loss": 0.8271, + "step": 1073 + }, + { + "epoch": 6.59, + "learning_rate": 5.507076297524818e-06, + "loss": 0.8428, + "step": 1074 + }, + { + "epoch": 6.6, + "learning_rate": 5.48933280079631e-06, + "loss": 0.8076, + "step": 1075 + }, + { + "epoch": 6.6, + "learning_rate": 5.471607114589806e-06, + "loss": 0.8057, + "step": 1076 + }, + { + "epoch": 6.61, + "learning_rate": 5.453899308895774e-06, + "loss": 0.7715, + "step": 1077 + }, + { + "epoch": 6.61, + "learning_rate": 5.436209453634087e-06, + "loss": 0.7207, + "step": 1078 + }, + { + "epoch": 6.62, + "learning_rate": 5.418537618653743e-06, + "loss": 0.7812, + "step": 1079 + }, + { + "epoch": 6.63, + "learning_rate": 5.400883873732574e-06, + "loss": 0.8213, + "step": 1080 + }, + { + "epoch": 6.63, + "learning_rate": 5.3832482885769855e-06, + "loss": 0.7451, + "step": 1081 + }, + { + "epoch": 6.64, + "learning_rate": 5.365630932821688e-06, + "loss": 0.835, + "step": 1082 + }, + { + "epoch": 6.64, + "learning_rate": 5.3480318760294084e-06, + "loss": 0.8604, + "step": 1083 + }, + { + "epoch": 6.65, + "learning_rate": 5.330451187690614e-06, + "loss": 0.9072, + "step": 1084 + }, + { + "epoch": 6.66, + "learning_rate": 5.3128889372232436e-06, + "loss": 0.8721, + "step": 1085 + }, + { + "epoch": 6.66, + "learning_rate": 5.295345193972445e-06, + "loss": 0.8779, + "step": 1086 + }, + { + "epoch": 6.67, + "learning_rate": 5.277820027210279e-06, + "loss": 0.8916, + "step": 1087 + }, + { + "epoch": 6.67, + "learning_rate": 5.260313506135452e-06, + "loss": 0.8721, + "step": 1088 + }, + { + "epoch": 6.68, + "learning_rate": 5.242825699873068e-06, + "loss": 0.8613, + "step": 1089 + }, + { + "epoch": 6.69, + "learning_rate": 5.225356677474309e-06, + "loss": 0.8379, + "step": 1090 + }, + { + "epoch": 6.69, + "learning_rate": 5.2079065079162115e-06, + "loss": 0.708, + "step": 1091 + }, + { + "epoch": 6.7, + "learning_rate": 5.190475260101353e-06, + "loss": 0.873, + "step": 1092 + }, + { + "epoch": 6.71, + "learning_rate": 5.1730630028576055e-06, + "loss": 0.7119, + "step": 1093 + }, + { + "epoch": 6.71, + "learning_rate": 5.155669804937855e-06, + "loss": 0.8848, + "step": 1094 + }, + { + "epoch": 6.72, + "learning_rate": 5.138295735019741e-06, + "loss": 0.8633, + "step": 1095 + }, + { + "epoch": 6.72, + "learning_rate": 5.120940861705357e-06, + "loss": 0.8203, + "step": 1096 + }, + { + "epoch": 6.73, + "learning_rate": 5.103605253521007e-06, + "loss": 0.8398, + "step": 1097 + }, + { + "epoch": 6.74, + "learning_rate": 5.086288978916931e-06, + "loss": 0.9297, + "step": 1098 + }, + { + "epoch": 6.74, + "learning_rate": 5.068992106267021e-06, + "loss": 0.71, + "step": 1099 + }, + { + "epoch": 6.75, + "learning_rate": 5.051714703868569e-06, + "loss": 0.7275, + "step": 1100 + }, + { + "epoch": 6.75, + "learning_rate": 5.034456839941979e-06, + "loss": 0.8164, + "step": 1101 + }, + { + "epoch": 6.76, + "learning_rate": 5.017218582630507e-06, + "loss": 0.7363, + "step": 1102 + }, + { + "epoch": 6.77, + "learning_rate": 5.000000000000003e-06, + "loss": 0.9561, + "step": 1103 + }, + { + "epoch": 6.77, + "learning_rate": 4.982801160038614e-06, + "loss": 0.834, + "step": 1104 + }, + { + "epoch": 6.78, + "learning_rate": 4.965622130656551e-06, + "loss": 0.8418, + "step": 1105 + }, + { + "epoch": 6.79, + "learning_rate": 4.948462979685783e-06, + "loss": 0.8418, + "step": 1106 + }, + { + "epoch": 6.79, + "learning_rate": 4.931323774879807e-06, + "loss": 0.8584, + "step": 1107 + }, + { + "epoch": 6.8, + "learning_rate": 4.914204583913349e-06, + "loss": 0.8105, + "step": 1108 + }, + { + "epoch": 6.8, + "learning_rate": 4.897105474382109e-06, + "loss": 0.9131, + "step": 1109 + }, + { + "epoch": 6.81, + "learning_rate": 4.880026513802504e-06, + "loss": 0.791, + "step": 1110 + }, + { + "epoch": 6.82, + "learning_rate": 4.862967769611389e-06, + "loss": 0.8828, + "step": 1111 + }, + { + "epoch": 6.82, + "learning_rate": 4.845929309165793e-06, + "loss": 0.8291, + "step": 1112 + }, + { + "epoch": 6.83, + "learning_rate": 4.828911199742646e-06, + "loss": 0.8252, + "step": 1113 + }, + { + "epoch": 6.83, + "learning_rate": 4.8119135085385375e-06, + "loss": 0.7529, + "step": 1114 + }, + { + "epoch": 6.84, + "learning_rate": 4.794936302669417e-06, + "loss": 0.8613, + "step": 1115 + }, + { + "epoch": 6.85, + "learning_rate": 4.777979649170367e-06, + "loss": 0.7803, + "step": 1116 + }, + { + "epoch": 6.85, + "learning_rate": 4.7610436149953e-06, + "loss": 0.9141, + "step": 1117 + }, + { + "epoch": 6.86, + "learning_rate": 4.744128267016719e-06, + "loss": 0.8291, + "step": 1118 + }, + { + "epoch": 6.87, + "learning_rate": 4.727233672025453e-06, + "loss": 0.7451, + "step": 1119 + }, + { + "epoch": 6.87, + "learning_rate": 4.710359896730379e-06, + "loss": 0.8457, + "step": 1120 + }, + { + "epoch": 6.88, + "learning_rate": 4.693507007758165e-06, + "loss": 0.7646, + "step": 1121 + }, + { + "epoch": 6.88, + "learning_rate": 4.676675071653019e-06, + "loss": 0.8506, + "step": 1122 + }, + { + "epoch": 6.89, + "learning_rate": 4.659864154876411e-06, + "loss": 0.7246, + "step": 1123 + }, + { + "epoch": 6.9, + "learning_rate": 4.643074323806813e-06, + "loss": 0.8555, + "step": 1124 + }, + { + "epoch": 6.9, + "learning_rate": 4.626305644739435e-06, + "loss": 0.8125, + "step": 1125 + }, + { + "epoch": 6.91, + "learning_rate": 4.609558183885979e-06, + "loss": 0.8418, + "step": 1126 + }, + { + "epoch": 6.91, + "learning_rate": 4.592832007374364e-06, + "loss": 0.8271, + "step": 1127 + }, + { + "epoch": 6.92, + "learning_rate": 4.576127181248459e-06, + "loss": 0.7979, + "step": 1128 + }, + { + "epoch": 6.93, + "learning_rate": 4.559443771467833e-06, + "loss": 0.8438, + "step": 1129 + }, + { + "epoch": 6.93, + "learning_rate": 4.542781843907499e-06, + "loss": 0.7432, + "step": 1130 + }, + { + "epoch": 6.94, + "learning_rate": 4.5261414643576396e-06, + "loss": 0.7852, + "step": 1131 + }, + { + "epoch": 6.94, + "learning_rate": 4.509522698523352e-06, + "loss": 0.8125, + "step": 1132 + }, + { + "epoch": 6.95, + "learning_rate": 4.492925612024402e-06, + "loss": 0.7588, + "step": 1133 + }, + { + "epoch": 6.96, + "learning_rate": 4.476350270394942e-06, + "loss": 0.751, + "step": 1134 + }, + { + "epoch": 6.96, + "learning_rate": 4.4597967390832745e-06, + "loss": 0.9287, + "step": 1135 + }, + { + "epoch": 6.97, + "learning_rate": 4.4432650834515735e-06, + "loss": 0.7432, + "step": 1136 + }, + { + "epoch": 6.98, + "learning_rate": 4.426755368775637e-06, + "loss": 0.7783, + "step": 1137 + }, + { + "epoch": 6.98, + "learning_rate": 4.4102676602446375e-06, + "loss": 0.8613, + "step": 1138 + }, + { + "epoch": 6.99, + "learning_rate": 4.3938020229608506e-06, + "loss": 0.8584, + "step": 1139 + }, + { + "epoch": 6.99, + "learning_rate": 4.377358521939401e-06, + "loss": 0.8105, + "step": 1140 + }, + { + "epoch": 7.0, + "learning_rate": 4.360937222108002e-06, + "loss": 0.7871, + "step": 1141 + }, + { + "epoch": 7.01, + "learning_rate": 4.344538188306723e-06, + "loss": 0.5469, + "step": 1142 + }, + { + "epoch": 7.01, + "learning_rate": 4.328161485287693e-06, + "loss": 0.6025, + "step": 1143 + }, + { + "epoch": 7.02, + "learning_rate": 4.3118071777148865e-06, + "loss": 0.5752, + "step": 1144 + }, + { + "epoch": 7.02, + "learning_rate": 4.295475330163832e-06, + "loss": 0.6367, + "step": 1145 + }, + { + "epoch": 7.03, + "learning_rate": 4.279166007121389e-06, + "loss": 0.5527, + "step": 1146 + }, + { + "epoch": 7.04, + "learning_rate": 4.262879272985468e-06, + "loss": 0.5439, + "step": 1147 + }, + { + "epoch": 7.04, + "learning_rate": 4.246615192064787e-06, + "loss": 0.5586, + "step": 1148 + }, + { + "epoch": 7.05, + "learning_rate": 4.230373828578626e-06, + "loss": 0.6318, + "step": 1149 + }, + { + "epoch": 7.06, + "learning_rate": 4.21415524665655e-06, + "loss": 0.6299, + "step": 1150 + }, + { + "epoch": 7.06, + "learning_rate": 4.197959510338187e-06, + "loss": 0.583, + "step": 1151 + }, + { + "epoch": 7.07, + "learning_rate": 4.181786683572946e-06, + "loss": 0.626, + "step": 1152 + }, + { + "epoch": 7.07, + "learning_rate": 4.165636830219776e-06, + "loss": 0.5845, + "step": 1153 + }, + { + "epoch": 7.08, + "learning_rate": 4.149510014046922e-06, + "loss": 0.5723, + "step": 1154 + }, + { + "epoch": 7.09, + "learning_rate": 4.1334062987316695e-06, + "loss": 0.5391, + "step": 1155 + }, + { + "epoch": 7.09, + "learning_rate": 4.117325747860077e-06, + "loss": 0.5967, + "step": 1156 + }, + { + "epoch": 7.1, + "learning_rate": 4.101268424926741e-06, + "loss": 0.6357, + "step": 1157 + }, + { + "epoch": 7.1, + "learning_rate": 4.085234393334551e-06, + "loss": 0.5654, + "step": 1158 + }, + { + "epoch": 7.11, + "learning_rate": 4.069223716394419e-06, + "loss": 0.5889, + "step": 1159 + }, + { + "epoch": 7.12, + "learning_rate": 4.053236457325043e-06, + "loss": 0.5615, + "step": 1160 + }, + { + "epoch": 7.12, + "learning_rate": 4.0372726792526614e-06, + "loss": 0.5459, + "step": 1161 + }, + { + "epoch": 7.13, + "learning_rate": 4.021332445210785e-06, + "loss": 0.6182, + "step": 1162 + }, + { + "epoch": 7.13, + "learning_rate": 4.005415818139975e-06, + "loss": 0.6357, + "step": 1163 + }, + { + "epoch": 7.14, + "learning_rate": 3.989522860887567e-06, + "loss": 0.5, + "step": 1164 + }, + { + "epoch": 7.15, + "learning_rate": 3.973653636207437e-06, + "loss": 0.5625, + "step": 1165 + }, + { + "epoch": 7.15, + "learning_rate": 3.95780820675976e-06, + "loss": 0.6074, + "step": 1166 + }, + { + "epoch": 7.16, + "learning_rate": 3.941986635110754e-06, + "loss": 0.6416, + "step": 1167 + }, + { + "epoch": 7.17, + "learning_rate": 3.9261889837324245e-06, + "loss": 0.5239, + "step": 1168 + }, + { + "epoch": 7.17, + "learning_rate": 3.910415315002328e-06, + "loss": 0.5127, + "step": 1169 + }, + { + "epoch": 7.18, + "learning_rate": 3.89466569120334e-06, + "loss": 0.5771, + "step": 1170 + }, + { + "epoch": 7.18, + "learning_rate": 3.878940174523371e-06, + "loss": 0.6367, + "step": 1171 + }, + { + "epoch": 7.19, + "learning_rate": 3.8632388270551665e-06, + "loss": 0.6191, + "step": 1172 + }, + { + "epoch": 7.2, + "learning_rate": 3.847561710796019e-06, + "loss": 0.5928, + "step": 1173 + }, + { + "epoch": 7.2, + "learning_rate": 3.8319088876475595e-06, + "loss": 0.5742, + "step": 1174 + }, + { + "epoch": 7.21, + "learning_rate": 3.816280419415487e-06, + "loss": 0.6201, + "step": 1175 + }, + { + "epoch": 7.21, + "learning_rate": 3.8006763678093326e-06, + "loss": 0.6885, + "step": 1176 + }, + { + "epoch": 7.22, + "learning_rate": 3.785096794442229e-06, + "loss": 0.5742, + "step": 1177 + }, + { + "epoch": 7.23, + "learning_rate": 3.7695417608306415e-06, + "loss": 0.5352, + "step": 1178 + }, + { + "epoch": 7.23, + "learning_rate": 3.7540113283941536e-06, + "loss": 0.6123, + "step": 1179 + }, + { + "epoch": 7.24, + "learning_rate": 3.7385055584552e-06, + "loss": 0.5605, + "step": 1180 + }, + { + "epoch": 7.25, + "learning_rate": 3.723024512238833e-06, + "loss": 0.541, + "step": 1181 + }, + { + "epoch": 7.25, + "learning_rate": 3.707568250872493e-06, + "loss": 0.6328, + "step": 1182 + }, + { + "epoch": 7.26, + "learning_rate": 3.6921368353857524e-06, + "loss": 0.5498, + "step": 1183 + }, + { + "epoch": 7.26, + "learning_rate": 3.676730326710074e-06, + "loss": 0.5938, + "step": 1184 + }, + { + "epoch": 7.27, + "learning_rate": 3.6613487856785744e-06, + "loss": 0.5742, + "step": 1185 + }, + { + "epoch": 7.28, + "learning_rate": 3.645992273025797e-06, + "loss": 0.5493, + "step": 1186 + }, + { + "epoch": 7.28, + "learning_rate": 3.630660849387444e-06, + "loss": 0.5947, + "step": 1187 + }, + { + "epoch": 7.29, + "learning_rate": 3.6153545753001663e-06, + "loss": 0.5522, + "step": 1188 + }, + { + "epoch": 7.29, + "learning_rate": 3.6000735112012984e-06, + "loss": 0.5967, + "step": 1189 + }, + { + "epoch": 7.3, + "learning_rate": 3.584817717428647e-06, + "loss": 0.6006, + "step": 1190 + }, + { + "epoch": 7.31, + "learning_rate": 3.569587254220225e-06, + "loss": 0.5664, + "step": 1191 + }, + { + "epoch": 7.31, + "learning_rate": 3.5543821817140313e-06, + "loss": 0.5898, + "step": 1192 + }, + { + "epoch": 7.32, + "learning_rate": 3.5392025599478053e-06, + "loss": 0.4985, + "step": 1193 + }, + { + "epoch": 7.33, + "learning_rate": 3.5240484488588012e-06, + "loss": 0.5273, + "step": 1194 + }, + { + "epoch": 7.33, + "learning_rate": 3.5089199082835436e-06, + "loss": 0.627, + "step": 1195 + }, + { + "epoch": 7.34, + "learning_rate": 3.493816997957582e-06, + "loss": 0.5479, + "step": 1196 + }, + { + "epoch": 7.34, + "learning_rate": 3.478739777515264e-06, + "loss": 0.5625, + "step": 1197 + }, + { + "epoch": 7.35, + "learning_rate": 3.463688306489511e-06, + "loss": 0.5649, + "step": 1198 + }, + { + "epoch": 7.36, + "learning_rate": 3.448662644311567e-06, + "loss": 0.6064, + "step": 1199 + }, + { + "epoch": 7.36, + "learning_rate": 3.433662850310763e-06, + "loss": 0.6211, + "step": 1200 + }, + { + "epoch": 7.37, + "learning_rate": 3.418688983714291e-06, + "loss": 0.5337, + "step": 1201 + }, + { + "epoch": 7.37, + "learning_rate": 3.403741103646977e-06, + "loss": 0.6035, + "step": 1202 + }, + { + "epoch": 7.38, + "learning_rate": 3.3888192691310262e-06, + "loss": 0.5508, + "step": 1203 + }, + { + "epoch": 7.39, + "learning_rate": 3.373923539085805e-06, + "loss": 0.5215, + "step": 1204 + }, + { + "epoch": 7.39, + "learning_rate": 3.3590539723276083e-06, + "loss": 0.5239, + "step": 1205 + }, + { + "epoch": 7.4, + "learning_rate": 3.3442106275694295e-06, + "loss": 0.5444, + "step": 1206 + }, + { + "epoch": 7.4, + "learning_rate": 3.329393563420713e-06, + "loss": 0.6401, + "step": 1207 + }, + { + "epoch": 7.41, + "learning_rate": 3.3146028383871363e-06, + "loss": 0.5825, + "step": 1208 + }, + { + "epoch": 7.42, + "learning_rate": 3.2998385108703766e-06, + "loss": 0.5347, + "step": 1209 + }, + { + "epoch": 7.42, + "learning_rate": 3.285100639167883e-06, + "loss": 0.5645, + "step": 1210 + }, + { + "epoch": 7.43, + "learning_rate": 3.2703892814726436e-06, + "loss": 0.5459, + "step": 1211 + }, + { + "epoch": 7.44, + "learning_rate": 3.2557044958729466e-06, + "loss": 0.582, + "step": 1212 + }, + { + "epoch": 7.44, + "learning_rate": 3.2410463403521653e-06, + "loss": 0.6035, + "step": 1213 + }, + { + "epoch": 7.45, + "learning_rate": 3.2264148727885257e-06, + "loss": 0.6094, + "step": 1214 + }, + { + "epoch": 7.45, + "learning_rate": 3.211810150954867e-06, + "loss": 0.5801, + "step": 1215 + }, + { + "epoch": 7.46, + "learning_rate": 3.1972322325184347e-06, + "loss": 0.6016, + "step": 1216 + }, + { + "epoch": 7.47, + "learning_rate": 3.182681175040625e-06, + "loss": 0.5352, + "step": 1217 + }, + { + "epoch": 7.47, + "learning_rate": 3.1681570359767875e-06, + "loss": 0.5757, + "step": 1218 + }, + { + "epoch": 7.48, + "learning_rate": 3.1536598726759747e-06, + "loss": 0.5894, + "step": 1219 + }, + { + "epoch": 7.48, + "learning_rate": 3.1391897423807204e-06, + "loss": 0.4736, + "step": 1220 + }, + { + "epoch": 7.49, + "learning_rate": 3.1247467022268284e-06, + "loss": 0.4985, + "step": 1221 + }, + { + "epoch": 7.5, + "learning_rate": 3.110330809243134e-06, + "loss": 0.5459, + "step": 1222 + }, + { + "epoch": 7.5, + "learning_rate": 3.095942120351276e-06, + "loss": 0.4756, + "step": 1223 + }, + { + "epoch": 7.51, + "learning_rate": 3.081580692365478e-06, + "loss": 0.5908, + "step": 1224 + }, + { + "epoch": 7.52, + "learning_rate": 3.0672465819923215e-06, + "loss": 0.583, + "step": 1225 + }, + { + "epoch": 7.52, + "learning_rate": 3.052939845830528e-06, + "loss": 0.5034, + "step": 1226 + }, + { + "epoch": 7.53, + "learning_rate": 3.0386605403707347e-06, + "loss": 0.4697, + "step": 1227 + }, + { + "epoch": 7.53, + "learning_rate": 3.0244087219952565e-06, + "loss": 0.5146, + "step": 1228 + }, + { + "epoch": 7.54, + "learning_rate": 3.0101844469778797e-06, + "loss": 0.5674, + "step": 1229 + }, + { + "epoch": 7.55, + "learning_rate": 2.9959877714836406e-06, + "loss": 0.542, + "step": 1230 + }, + { + "epoch": 7.55, + "learning_rate": 2.981818751568586e-06, + "loss": 0.5669, + "step": 1231 + }, + { + "epoch": 7.56, + "learning_rate": 2.9676774431795752e-06, + "loss": 0.5244, + "step": 1232 + }, + { + "epoch": 7.56, + "learning_rate": 2.95356390215404e-06, + "loss": 0.5679, + "step": 1233 + }, + { + "epoch": 7.57, + "learning_rate": 2.939478184219777e-06, + "loss": 0.4868, + "step": 1234 + }, + { + "epoch": 7.58, + "learning_rate": 2.9254203449947196e-06, + "loss": 0.5498, + "step": 1235 + }, + { + "epoch": 7.58, + "learning_rate": 2.9113904399867188e-06, + "loss": 0.6143, + "step": 1236 + }, + { + "epoch": 7.59, + "learning_rate": 2.8973885245933287e-06, + "loss": 0.6279, + "step": 1237 + }, + { + "epoch": 7.6, + "learning_rate": 2.8834146541015874e-06, + "loss": 0.5552, + "step": 1238 + }, + { + "epoch": 7.6, + "learning_rate": 2.869468883687798e-06, + "loss": 0.5186, + "step": 1239 + }, + { + "epoch": 7.61, + "learning_rate": 2.855551268417305e-06, + "loss": 0.5244, + "step": 1240 + }, + { + "epoch": 7.61, + "learning_rate": 2.8416618632442785e-06, + "loss": 0.5884, + "step": 1241 + }, + { + "epoch": 7.62, + "learning_rate": 2.827800723011508e-06, + "loss": 0.6289, + "step": 1242 + }, + { + "epoch": 7.63, + "learning_rate": 2.813967902450179e-06, + "loss": 0.5732, + "step": 1243 + }, + { + "epoch": 7.63, + "learning_rate": 2.8001634561796463e-06, + "loss": 0.5527, + "step": 1244 + }, + { + "epoch": 7.64, + "learning_rate": 2.786387438707231e-06, + "loss": 0.5835, + "step": 1245 + }, + { + "epoch": 7.64, + "learning_rate": 2.7726399044280107e-06, + "loss": 0.5557, + "step": 1246 + }, + { + "epoch": 7.65, + "learning_rate": 2.758920907624585e-06, + "loss": 0.5322, + "step": 1247 + }, + { + "epoch": 7.66, + "learning_rate": 2.7452305024668747e-06, + "loss": 0.54, + "step": 1248 + }, + { + "epoch": 7.66, + "learning_rate": 2.7315687430119097e-06, + "loss": 0.6719, + "step": 1249 + }, + { + "epoch": 7.67, + "learning_rate": 2.7179356832036142e-06, + "loss": 0.6846, + "step": 1250 + }, + { + "epoch": 7.67, + "learning_rate": 2.704331376872581e-06, + "loss": 0.5723, + "step": 1251 + }, + { + "epoch": 7.68, + "learning_rate": 2.6907558777358756e-06, + "loss": 0.5562, + "step": 1252 + }, + { + "epoch": 7.69, + "learning_rate": 2.677209239396811e-06, + "loss": 0.5967, + "step": 1253 + }, + { + "epoch": 7.69, + "learning_rate": 2.6636915153447494e-06, + "loss": 0.4829, + "step": 1254 + }, + { + "epoch": 7.7, + "learning_rate": 2.650202758954886e-06, + "loss": 0.6201, + "step": 1255 + }, + { + "epoch": 7.71, + "learning_rate": 2.6367430234880286e-06, + "loss": 0.4766, + "step": 1256 + }, + { + "epoch": 7.71, + "learning_rate": 2.6233123620903946e-06, + "loss": 0.583, + "step": 1257 + }, + { + "epoch": 7.72, + "learning_rate": 2.6099108277934105e-06, + "loss": 0.5054, + "step": 1258 + }, + { + "epoch": 7.72, + "learning_rate": 2.5965384735134825e-06, + "loss": 0.5459, + "step": 1259 + }, + { + "epoch": 7.73, + "learning_rate": 2.583195352051808e-06, + "loss": 0.5312, + "step": 1260 + }, + { + "epoch": 7.74, + "learning_rate": 2.5698815160941494e-06, + "loss": 0.584, + "step": 1261 + }, + { + "epoch": 7.74, + "learning_rate": 2.5565970182106425e-06, + "loss": 0.5928, + "step": 1262 + }, + { + "epoch": 7.75, + "learning_rate": 2.5433419108555758e-06, + "loss": 0.5205, + "step": 1263 + }, + { + "epoch": 7.75, + "learning_rate": 2.5301162463671845e-06, + "loss": 0.5303, + "step": 1264 + }, + { + "epoch": 7.76, + "learning_rate": 2.516920076967455e-06, + "loss": 0.5615, + "step": 1265 + }, + { + "epoch": 7.77, + "learning_rate": 2.5037534547619125e-06, + "loss": 0.6182, + "step": 1266 + }, + { + "epoch": 7.77, + "learning_rate": 2.4906164317394067e-06, + "loss": 0.5088, + "step": 1267 + }, + { + "epoch": 7.78, + "learning_rate": 2.4775090597719163e-06, + "loss": 0.5264, + "step": 1268 + }, + { + "epoch": 7.79, + "learning_rate": 2.4644313906143414e-06, + "loss": 0.5195, + "step": 1269 + }, + { + "epoch": 7.79, + "learning_rate": 2.451383475904304e-06, + "loss": 0.5332, + "step": 1270 + }, + { + "epoch": 7.8, + "learning_rate": 2.438365367161939e-06, + "loss": 0.5718, + "step": 1271 + }, + { + "epoch": 7.8, + "learning_rate": 2.4253771157896856e-06, + "loss": 0.5269, + "step": 1272 + }, + { + "epoch": 7.81, + "learning_rate": 2.4124187730720916e-06, + "loss": 0.563, + "step": 1273 + }, + { + "epoch": 7.82, + "learning_rate": 2.3994903901756163e-06, + "loss": 0.5156, + "step": 1274 + }, + { + "epoch": 7.82, + "learning_rate": 2.3865920181484127e-06, + "loss": 0.478, + "step": 1275 + }, + { + "epoch": 7.83, + "learning_rate": 2.3737237079201437e-06, + "loss": 0.5879, + "step": 1276 + }, + { + "epoch": 7.83, + "learning_rate": 2.3608855103017613e-06, + "loss": 0.5972, + "step": 1277 + }, + { + "epoch": 7.84, + "learning_rate": 2.3480774759853307e-06, + "loss": 0.5254, + "step": 1278 + }, + { + "epoch": 7.85, + "learning_rate": 2.3352996555438036e-06, + "loss": 0.5645, + "step": 1279 + }, + { + "epoch": 7.85, + "learning_rate": 2.3225520994308382e-06, + "loss": 0.5957, + "step": 1280 + }, + { + "epoch": 7.86, + "learning_rate": 2.309834857980583e-06, + "loss": 0.5371, + "step": 1281 + }, + { + "epoch": 7.87, + "learning_rate": 2.297147981407509e-06, + "loss": 0.5508, + "step": 1282 + }, + { + "epoch": 7.87, + "learning_rate": 2.2844915198061714e-06, + "loss": 0.4985, + "step": 1283 + }, + { + "epoch": 7.88, + "learning_rate": 2.2718655231510368e-06, + "loss": 0.5928, + "step": 1284 + }, + { + "epoch": 7.88, + "learning_rate": 2.2592700412962775e-06, + "loss": 0.5928, + "step": 1285 + }, + { + "epoch": 7.89, + "learning_rate": 2.246705123975582e-06, + "loss": 0.6377, + "step": 1286 + }, + { + "epoch": 7.9, + "learning_rate": 2.234170820801954e-06, + "loss": 0.5674, + "step": 1287 + }, + { + "epoch": 7.9, + "learning_rate": 2.2216671812675118e-06, + "loss": 0.4785, + "step": 1288 + }, + { + "epoch": 7.91, + "learning_rate": 2.209194254743295e-06, + "loss": 0.5767, + "step": 1289 + }, + { + "epoch": 7.91, + "learning_rate": 2.196752090479083e-06, + "loss": 0.5601, + "step": 1290 + }, + { + "epoch": 7.92, + "learning_rate": 2.184340737603178e-06, + "loss": 0.4595, + "step": 1291 + }, + { + "epoch": 7.93, + "learning_rate": 2.1719602451222245e-06, + "loss": 0.5625, + "step": 1292 + }, + { + "epoch": 7.93, + "learning_rate": 2.159610661921018e-06, + "loss": 0.5679, + "step": 1293 + }, + { + "epoch": 7.94, + "learning_rate": 2.1472920367623094e-06, + "loss": 0.6499, + "step": 1294 + }, + { + "epoch": 7.94, + "learning_rate": 2.1350044182866025e-06, + "loss": 0.4966, + "step": 1295 + }, + { + "epoch": 7.95, + "learning_rate": 2.1227478550119763e-06, + "loss": 0.5933, + "step": 1296 + }, + { + "epoch": 7.96, + "learning_rate": 2.1105223953338805e-06, + "loss": 0.4814, + "step": 1297 + }, + { + "epoch": 7.96, + "learning_rate": 2.09832808752496e-06, + "loss": 0.5088, + "step": 1298 + }, + { + "epoch": 7.97, + "learning_rate": 2.086164979734856e-06, + "loss": 0.5586, + "step": 1299 + }, + { + "epoch": 7.98, + "learning_rate": 2.0740331199900053e-06, + "loss": 0.5396, + "step": 1300 + }, + { + "epoch": 7.98, + "learning_rate": 2.0619325561934658e-06, + "loss": 0.6182, + "step": 1301 + }, + { + "epoch": 7.99, + "learning_rate": 2.0498633361247278e-06, + "loss": 0.5537, + "step": 1302 + }, + { + "epoch": 7.99, + "learning_rate": 2.0378255074395094e-06, + "loss": 0.5107, + "step": 1303 + }, + { + "epoch": 8.0, + "learning_rate": 2.0258191176695896e-06, + "loss": 0.5176, + "step": 1304 + }, + { + "epoch": 8.01, + "learning_rate": 2.0138442142226e-06, + "loss": 0.4658, + "step": 1305 + }, + { + "epoch": 8.01, + "learning_rate": 2.001900844381857e-06, + "loss": 0.3608, + "step": 1306 + }, + { + "epoch": 8.02, + "learning_rate": 1.9899890553061565e-06, + "loss": 0.4785, + "step": 1307 + }, + { + "epoch": 8.02, + "learning_rate": 1.978108894029598e-06, + "loss": 0.4692, + "step": 1308 + }, + { + "epoch": 8.03, + "learning_rate": 1.9662604074614044e-06, + "loss": 0.4463, + "step": 1309 + }, + { + "epoch": 8.04, + "learning_rate": 1.954443642385727e-06, + "loss": 0.4473, + "step": 1310 + }, + { + "epoch": 8.04, + "learning_rate": 1.9426586454614617e-06, + "loss": 0.3853, + "step": 1311 + }, + { + "epoch": 8.05, + "learning_rate": 1.9309054632220645e-06, + "loss": 0.4043, + "step": 1312 + }, + { + "epoch": 8.06, + "learning_rate": 1.919184142075372e-06, + "loss": 0.3589, + "step": 1313 + }, + { + "epoch": 8.06, + "learning_rate": 1.9074947283034206e-06, + "loss": 0.3608, + "step": 1314 + }, + { + "epoch": 8.07, + "learning_rate": 1.895837268062256e-06, + "loss": 0.499, + "step": 1315 + }, + { + "epoch": 8.07, + "learning_rate": 1.884211807381755e-06, + "loss": 0.4058, + "step": 1316 + }, + { + "epoch": 8.08, + "learning_rate": 1.8726183921654373e-06, + "loss": 0.5142, + "step": 1317 + }, + { + "epoch": 8.09, + "learning_rate": 1.8610570681903018e-06, + "loss": 0.3506, + "step": 1318 + }, + { + "epoch": 8.09, + "learning_rate": 1.8495278811066197e-06, + "loss": 0.4849, + "step": 1319 + }, + { + "epoch": 8.1, + "learning_rate": 1.8380308764377841e-06, + "loss": 0.3979, + "step": 1320 + }, + { + "epoch": 8.1, + "learning_rate": 1.8265660995801004e-06, + "loss": 0.375, + "step": 1321 + }, + { + "epoch": 8.11, + "learning_rate": 1.8151335958026317e-06, + "loss": 0.4575, + "step": 1322 + }, + { + "epoch": 8.12, + "learning_rate": 1.803733410247006e-06, + "loss": 0.3691, + "step": 1323 + }, + { + "epoch": 8.12, + "learning_rate": 1.7923655879272395e-06, + "loss": 0.4448, + "step": 1324 + }, + { + "epoch": 8.13, + "learning_rate": 1.7810301737295588e-06, + "loss": 0.4111, + "step": 1325 + }, + { + "epoch": 8.13, + "learning_rate": 1.76972721241224e-06, + "loss": 0.3872, + "step": 1326 + }, + { + "epoch": 8.14, + "learning_rate": 1.7584567486054039e-06, + "loss": 0.4336, + "step": 1327 + }, + { + "epoch": 8.15, + "learning_rate": 1.7472188268108569e-06, + "loss": 0.3569, + "step": 1328 + }, + { + "epoch": 8.15, + "learning_rate": 1.7360134914019122e-06, + "loss": 0.4526, + "step": 1329 + }, + { + "epoch": 8.16, + "learning_rate": 1.7248407866232175e-06, + "loss": 0.4351, + "step": 1330 + }, + { + "epoch": 8.17, + "learning_rate": 1.7137007565905772e-06, + "loss": 0.3394, + "step": 1331 + }, + { + "epoch": 8.17, + "learning_rate": 1.7025934452907755e-06, + "loss": 0.439, + "step": 1332 + }, + { + "epoch": 8.18, + "learning_rate": 1.6915188965814034e-06, + "loss": 0.437, + "step": 1333 + }, + { + "epoch": 8.18, + "learning_rate": 1.6804771541906972e-06, + "loss": 0.3999, + "step": 1334 + }, + { + "epoch": 8.19, + "learning_rate": 1.6694682617173452e-06, + "loss": 0.3999, + "step": 1335 + }, + { + "epoch": 8.2, + "learning_rate": 1.6584922626303325e-06, + "loss": 0.4165, + "step": 1336 + }, + { + "epoch": 8.2, + "learning_rate": 1.6475492002687632e-06, + "loss": 0.4141, + "step": 1337 + }, + { + "epoch": 8.21, + "learning_rate": 1.6366391178416918e-06, + "loss": 0.397, + "step": 1338 + }, + { + "epoch": 8.21, + "learning_rate": 1.6257620584279454e-06, + "loss": 0.3926, + "step": 1339 + }, + { + "epoch": 8.22, + "learning_rate": 1.6149180649759622e-06, + "loss": 0.3926, + "step": 1340 + }, + { + "epoch": 8.23, + "learning_rate": 1.60410718030361e-06, + "loss": 0.436, + "step": 1341 + }, + { + "epoch": 8.23, + "learning_rate": 1.5933294470980443e-06, + "loss": 0.4141, + "step": 1342 + }, + { + "epoch": 8.24, + "learning_rate": 1.5825849079155032e-06, + "loss": 0.4165, + "step": 1343 + }, + { + "epoch": 8.25, + "learning_rate": 1.5718736051811634e-06, + "loss": 0.4912, + "step": 1344 + }, + { + "epoch": 8.25, + "learning_rate": 1.5611955811889645e-06, + "loss": 0.397, + "step": 1345 + }, + { + "epoch": 8.26, + "learning_rate": 1.5505508781014489e-06, + "loss": 0.4297, + "step": 1346 + }, + { + "epoch": 8.26, + "learning_rate": 1.539939537949583e-06, + "loss": 0.4883, + "step": 1347 + }, + { + "epoch": 8.27, + "learning_rate": 1.5293616026326053e-06, + "loss": 0.3496, + "step": 1348 + }, + { + "epoch": 8.28, + "learning_rate": 1.5188171139178486e-06, + "loss": 0.4014, + "step": 1349 + }, + { + "epoch": 8.28, + "learning_rate": 1.5083061134405874e-06, + "loss": 0.3706, + "step": 1350 + }, + { + "epoch": 8.29, + "learning_rate": 1.4978286427038602e-06, + "loss": 0.4463, + "step": 1351 + }, + { + "epoch": 8.29, + "learning_rate": 1.4873847430783118e-06, + "loss": 0.4316, + "step": 1352 + }, + { + "epoch": 8.3, + "learning_rate": 1.476974455802036e-06, + "loss": 0.4258, + "step": 1353 + }, + { + "epoch": 8.31, + "learning_rate": 1.4665978219804056e-06, + "loss": 0.3833, + "step": 1354 + }, + { + "epoch": 8.31, + "learning_rate": 1.4562548825859092e-06, + "loss": 0.3687, + "step": 1355 + }, + { + "epoch": 8.32, + "learning_rate": 1.4459456784579917e-06, + "loss": 0.4141, + "step": 1356 + }, + { + "epoch": 8.33, + "learning_rate": 1.435670250302892e-06, + "loss": 0.4692, + "step": 1357 + }, + { + "epoch": 8.33, + "learning_rate": 1.425428638693489e-06, + "loss": 0.3999, + "step": 1358 + }, + { + "epoch": 8.34, + "learning_rate": 1.415220884069135e-06, + "loss": 0.4443, + "step": 1359 + }, + { + "epoch": 8.34, + "learning_rate": 1.405047026735491e-06, + "loss": 0.3403, + "step": 1360 + }, + { + "epoch": 8.35, + "learning_rate": 1.394907106864375e-06, + "loss": 0.4438, + "step": 1361 + }, + { + "epoch": 8.36, + "learning_rate": 1.3848011644936077e-06, + "loss": 0.3643, + "step": 1362 + }, + { + "epoch": 8.36, + "learning_rate": 1.3747292395268407e-06, + "loss": 0.4121, + "step": 1363 + }, + { + "epoch": 8.37, + "learning_rate": 1.3646913717334142e-06, + "loss": 0.394, + "step": 1364 + }, + { + "epoch": 8.37, + "learning_rate": 1.3546876007481847e-06, + "loss": 0.4102, + "step": 1365 + }, + { + "epoch": 8.38, + "learning_rate": 1.344717966071385e-06, + "loss": 0.3857, + "step": 1366 + }, + { + "epoch": 8.39, + "learning_rate": 1.3347825070684518e-06, + "loss": 0.3726, + "step": 1367 + }, + { + "epoch": 8.39, + "learning_rate": 1.3248812629698815e-06, + "loss": 0.4077, + "step": 1368 + }, + { + "epoch": 8.4, + "learning_rate": 1.3150142728710669e-06, + "loss": 0.4009, + "step": 1369 + }, + { + "epoch": 8.4, + "learning_rate": 1.3051815757321607e-06, + "loss": 0.3789, + "step": 1370 + }, + { + "epoch": 8.41, + "learning_rate": 1.295383210377895e-06, + "loss": 0.3452, + "step": 1371 + }, + { + "epoch": 8.42, + "learning_rate": 1.2856192154974488e-06, + "loss": 0.4043, + "step": 1372 + }, + { + "epoch": 8.42, + "learning_rate": 1.2758896296442834e-06, + "loss": 0.4385, + "step": 1373 + }, + { + "epoch": 8.43, + "learning_rate": 1.266194491235998e-06, + "loss": 0.4263, + "step": 1374 + }, + { + "epoch": 8.44, + "learning_rate": 1.2565338385541792e-06, + "loss": 0.416, + "step": 1375 + }, + { + "epoch": 8.44, + "learning_rate": 1.2469077097442372e-06, + "loss": 0.4087, + "step": 1376 + }, + { + "epoch": 8.45, + "learning_rate": 1.2373161428152647e-06, + "loss": 0.4033, + "step": 1377 + }, + { + "epoch": 8.45, + "learning_rate": 1.2277591756398933e-06, + "loss": 0.3394, + "step": 1378 + }, + { + "epoch": 8.46, + "learning_rate": 1.2182368459541294e-06, + "loss": 0.4214, + "step": 1379 + }, + { + "epoch": 8.47, + "learning_rate": 1.2087491913572103e-06, + "loss": 0.4229, + "step": 1380 + }, + { + "epoch": 8.47, + "learning_rate": 1.1992962493114645e-06, + "loss": 0.3779, + "step": 1381 + }, + { + "epoch": 8.48, + "learning_rate": 1.1898780571421554e-06, + "loss": 0.4639, + "step": 1382 + }, + { + "epoch": 8.48, + "learning_rate": 1.1804946520373307e-06, + "loss": 0.4116, + "step": 1383 + }, + { + "epoch": 8.49, + "learning_rate": 1.171146071047683e-06, + "loss": 0.3823, + "step": 1384 + }, + { + "epoch": 8.5, + "learning_rate": 1.161832351086396e-06, + "loss": 0.4209, + "step": 1385 + }, + { + "epoch": 8.5, + "learning_rate": 1.1525535289290168e-06, + "loss": 0.3936, + "step": 1386 + }, + { + "epoch": 8.51, + "learning_rate": 1.1433096412132838e-06, + "loss": 0.3999, + "step": 1387 + }, + { + "epoch": 8.52, + "learning_rate": 1.1341007244390023e-06, + "loss": 0.437, + "step": 1388 + }, + { + "epoch": 8.52, + "learning_rate": 1.124926814967887e-06, + "loss": 0.3521, + "step": 1389 + }, + { + "epoch": 8.53, + "learning_rate": 1.1157879490234346e-06, + "loss": 0.4141, + "step": 1390 + }, + { + "epoch": 8.53, + "learning_rate": 1.1066841626907633e-06, + "loss": 0.418, + "step": 1391 + }, + { + "epoch": 8.54, + "learning_rate": 1.097615491916485e-06, + "loss": 0.4189, + "step": 1392 + }, + { + "epoch": 8.55, + "learning_rate": 1.088581972508549e-06, + "loss": 0.4517, + "step": 1393 + }, + { + "epoch": 8.55, + "learning_rate": 1.0795836401361148e-06, + "loss": 0.4067, + "step": 1394 + }, + { + "epoch": 8.56, + "learning_rate": 1.0706205303294025e-06, + "loss": 0.375, + "step": 1395 + }, + { + "epoch": 8.56, + "learning_rate": 1.0616926784795511e-06, + "loss": 0.3359, + "step": 1396 + }, + { + "epoch": 8.57, + "learning_rate": 1.0528001198384862e-06, + "loss": 0.4092, + "step": 1397 + }, + { + "epoch": 8.58, + "learning_rate": 1.043942889518782e-06, + "loss": 0.3726, + "step": 1398 + }, + { + "epoch": 8.58, + "learning_rate": 1.035121022493506e-06, + "loss": 0.4136, + "step": 1399 + }, + { + "epoch": 8.59, + "learning_rate": 1.026334553596101e-06, + "loss": 0.3877, + "step": 1400 + }, + { + "epoch": 8.6, + "learning_rate": 1.0175835175202341e-06, + "loss": 0.4268, + "step": 1401 + }, + { + "epoch": 8.6, + "learning_rate": 1.0088679488196695e-06, + "loss": 0.4053, + "step": 1402 + }, + { + "epoch": 8.61, + "learning_rate": 1.0001878819081268e-06, + "loss": 0.3955, + "step": 1403 + }, + { + "epoch": 8.61, + "learning_rate": 9.91543351059141e-07, + "loss": 0.3677, + "step": 1404 + }, + { + "epoch": 8.62, + "learning_rate": 9.829343904059342e-07, + "loss": 0.3691, + "step": 1405 + }, + { + "epoch": 8.63, + "learning_rate": 9.743610339412801e-07, + "loss": 0.4097, + "step": 1406 + }, + { + "epoch": 8.63, + "learning_rate": 9.658233155173657e-07, + "loss": 0.4043, + "step": 1407 + }, + { + "epoch": 8.64, + "learning_rate": 9.573212688456635e-07, + "loss": 0.4346, + "step": 1408 + }, + { + "epoch": 8.64, + "learning_rate": 9.488549274967873e-07, + "loss": 0.3755, + "step": 1409 + }, + { + "epoch": 8.65, + "learning_rate": 9.404243249003786e-07, + "loss": 0.373, + "step": 1410 + }, + { + "epoch": 8.66, + "learning_rate": 9.320294943449537e-07, + "loss": 0.4517, + "step": 1411 + }, + { + "epoch": 8.66, + "learning_rate": 9.236704689777842e-07, + "loss": 0.4087, + "step": 1412 + }, + { + "epoch": 8.67, + "learning_rate": 9.153472818047627e-07, + "loss": 0.4146, + "step": 1413 + }, + { + "epoch": 8.67, + "learning_rate": 9.070599656902801e-07, + "loss": 0.3848, + "step": 1414 + }, + { + "epoch": 8.68, + "learning_rate": 8.988085533570833e-07, + "loss": 0.3652, + "step": 1415 + }, + { + "epoch": 8.69, + "learning_rate": 8.905930773861527e-07, + "loss": 0.3765, + "step": 1416 + }, + { + "epoch": 8.69, + "learning_rate": 8.824135702165693e-07, + "loss": 0.395, + "step": 1417 + }, + { + "epoch": 8.7, + "learning_rate": 8.74270064145396e-07, + "loss": 0.3818, + "step": 1418 + }, + { + "epoch": 8.71, + "learning_rate": 8.661625913275463e-07, + "loss": 0.375, + "step": 1419 + }, + { + "epoch": 8.71, + "learning_rate": 8.580911837756467e-07, + "loss": 0.3896, + "step": 1420 + }, + { + "epoch": 8.72, + "learning_rate": 8.500558733599206e-07, + "loss": 0.3535, + "step": 1421 + }, + { + "epoch": 8.72, + "learning_rate": 8.420566918080686e-07, + "loss": 0.4189, + "step": 1422 + }, + { + "epoch": 8.73, + "learning_rate": 8.340936707051273e-07, + "loss": 0.4199, + "step": 1423 + }, + { + "epoch": 8.74, + "learning_rate": 8.261668414933521e-07, + "loss": 0.4771, + "step": 1424 + }, + { + "epoch": 8.74, + "learning_rate": 8.182762354720985e-07, + "loss": 0.3779, + "step": 1425 + }, + { + "epoch": 8.75, + "learning_rate": 8.10421883797694e-07, + "loss": 0.3979, + "step": 1426 + }, + { + "epoch": 8.75, + "learning_rate": 8.026038174833085e-07, + "loss": 0.4072, + "step": 1427 + }, + { + "epoch": 8.76, + "learning_rate": 7.948220673988427e-07, + "loss": 0.4141, + "step": 1428 + }, + { + "epoch": 8.77, + "learning_rate": 7.87076664270795e-07, + "loss": 0.3457, + "step": 1429 + }, + { + "epoch": 8.77, + "learning_rate": 7.793676386821602e-07, + "loss": 0.395, + "step": 1430 + }, + { + "epoch": 8.78, + "learning_rate": 7.716950210722818e-07, + "loss": 0.4409, + "step": 1431 + }, + { + "epoch": 8.79, + "learning_rate": 7.6405884173675e-07, + "loss": 0.4697, + "step": 1432 + }, + { + "epoch": 8.79, + "learning_rate": 7.564591308272773e-07, + "loss": 0.3926, + "step": 1433 + }, + { + "epoch": 8.8, + "learning_rate": 7.488959183515809e-07, + "loss": 0.3809, + "step": 1434 + }, + { + "epoch": 8.8, + "learning_rate": 7.413692341732582e-07, + "loss": 0.3564, + "step": 1435 + }, + { + "epoch": 8.81, + "learning_rate": 7.338791080116792e-07, + "loss": 0.3618, + "step": 1436 + }, + { + "epoch": 8.82, + "learning_rate": 7.264255694418576e-07, + "loss": 0.4092, + "step": 1437 + }, + { + "epoch": 8.82, + "learning_rate": 7.190086478943459e-07, + "loss": 0.4375, + "step": 1438 + }, + { + "epoch": 8.83, + "learning_rate": 7.116283726551077e-07, + "loss": 0.3667, + "step": 1439 + }, + { + "epoch": 8.83, + "learning_rate": 7.042847728654078e-07, + "loss": 0.3511, + "step": 1440 + }, + { + "epoch": 8.84, + "learning_rate": 6.969778775217007e-07, + "loss": 0.3926, + "step": 1441 + }, + { + "epoch": 8.85, + "learning_rate": 6.897077154755094e-07, + "loss": 0.4565, + "step": 1442 + }, + { + "epoch": 8.85, + "learning_rate": 6.824743154333157e-07, + "loss": 0.3608, + "step": 1443 + }, + { + "epoch": 8.86, + "learning_rate": 6.752777059564431e-07, + "loss": 0.4204, + "step": 1444 + }, + { + "epoch": 8.87, + "learning_rate": 6.681179154609463e-07, + "loss": 0.4058, + "step": 1445 + }, + { + "epoch": 8.87, + "learning_rate": 6.609949722175013e-07, + "loss": 0.3936, + "step": 1446 + }, + { + "epoch": 8.88, + "learning_rate": 6.539089043512914e-07, + "loss": 0.4004, + "step": 1447 + }, + { + "epoch": 8.88, + "learning_rate": 6.468597398418952e-07, + "loss": 0.3545, + "step": 1448 + }, + { + "epoch": 8.89, + "learning_rate": 6.398475065231746e-07, + "loss": 0.3264, + "step": 1449 + }, + { + "epoch": 8.9, + "learning_rate": 6.328722320831737e-07, + "loss": 0.3521, + "step": 1450 + }, + { + "epoch": 8.9, + "learning_rate": 6.259339440639966e-07, + "loss": 0.3779, + "step": 1451 + }, + { + "epoch": 8.91, + "learning_rate": 6.1903266986171e-07, + "loss": 0.397, + "step": 1452 + }, + { + "epoch": 8.91, + "learning_rate": 6.121684367262271e-07, + "loss": 0.4111, + "step": 1453 + }, + { + "epoch": 8.92, + "learning_rate": 6.053412717612061e-07, + "loss": 0.373, + "step": 1454 + }, + { + "epoch": 8.93, + "learning_rate": 5.985512019239392e-07, + "loss": 0.4199, + "step": 1455 + }, + { + "epoch": 8.93, + "learning_rate": 5.917982540252442e-07, + "loss": 0.3833, + "step": 1456 + }, + { + "epoch": 8.94, + "learning_rate": 5.850824547293655e-07, + "loss": 0.3838, + "step": 1457 + }, + { + "epoch": 8.94, + "learning_rate": 5.784038305538653e-07, + "loss": 0.4448, + "step": 1458 + }, + { + "epoch": 8.95, + "learning_rate": 5.71762407869515e-07, + "loss": 0.4224, + "step": 1459 + }, + { + "epoch": 8.96, + "learning_rate": 5.651582129001987e-07, + "loss": 0.3784, + "step": 1460 + }, + { + "epoch": 8.96, + "learning_rate": 5.585912717228015e-07, + "loss": 0.3955, + "step": 1461 + }, + { + "epoch": 8.97, + "learning_rate": 5.520616102671128e-07, + "loss": 0.4287, + "step": 1462 + }, + { + "epoch": 8.98, + "learning_rate": 5.455692543157243e-07, + "loss": 0.4048, + "step": 1463 + }, + { + "epoch": 8.98, + "learning_rate": 5.391142295039209e-07, + "loss": 0.4062, + "step": 1464 + }, + { + "epoch": 8.99, + "learning_rate": 5.326965613195867e-07, + "loss": 0.4785, + "step": 1465 + }, + { + "epoch": 8.99, + "learning_rate": 5.263162751031025e-07, + "loss": 0.4512, + "step": 1466 + }, + { + "epoch": 9.0, + "learning_rate": 5.199733960472431e-07, + "loss": 0.416, + "step": 1467 + }, + { + "epoch": 9.01, + "learning_rate": 5.136679491970809e-07, + "loss": 0.3584, + "step": 1468 + }, + { + "epoch": 9.01, + "learning_rate": 5.073999594498869e-07, + "loss": 0.3274, + "step": 1469 + }, + { + "epoch": 9.02, + "learning_rate": 5.011694515550303e-07, + "loss": 0.3901, + "step": 1470 + }, + { + "epoch": 9.02, + "learning_rate": 4.949764501138832e-07, + "loss": 0.3359, + "step": 1471 + }, + { + "epoch": 9.03, + "learning_rate": 4.888209795797205e-07, + "loss": 0.3325, + "step": 1472 + }, + { + "epoch": 9.04, + "learning_rate": 4.827030642576236e-07, + "loss": 0.3188, + "step": 1473 + }, + { + "epoch": 9.04, + "learning_rate": 4.766227283043912e-07, + "loss": 0.3936, + "step": 1474 + }, + { + "epoch": 9.05, + "learning_rate": 4.7057999572843516e-07, + "loss": 0.3057, + "step": 1475 + }, + { + "epoch": 9.06, + "learning_rate": 4.645748903896885e-07, + "loss": 0.3564, + "step": 1476 + }, + { + "epoch": 9.06, + "learning_rate": 4.5860743599951186e-07, + "loss": 0.3252, + "step": 1477 + }, + { + "epoch": 9.07, + "learning_rate": 4.5267765612060253e-07, + "loss": 0.355, + "step": 1478 + }, + { + "epoch": 9.07, + "learning_rate": 4.4678557416689586e-07, + "loss": 0.332, + "step": 1479 + }, + { + "epoch": 9.08, + "learning_rate": 4.4093121340347824e-07, + "loss": 0.3267, + "step": 1480 + }, + { + "epoch": 9.09, + "learning_rate": 4.3511459694648873e-07, + "loss": 0.3574, + "step": 1481 + }, + { + "epoch": 9.09, + "learning_rate": 4.2933574776303664e-07, + "loss": 0.3354, + "step": 1482 + }, + { + "epoch": 9.1, + "learning_rate": 4.235946886711018e-07, + "loss": 0.3193, + "step": 1483 + }, + { + "epoch": 9.1, + "learning_rate": 4.1789144233945087e-07, + "loss": 0.3301, + "step": 1484 + }, + { + "epoch": 9.11, + "learning_rate": 4.122260312875437e-07, + "loss": 0.3311, + "step": 1485 + }, + { + "epoch": 9.12, + "learning_rate": 4.0659847788544926e-07, + "loss": 0.3257, + "step": 1486 + }, + { + "epoch": 9.12, + "learning_rate": 4.010088043537519e-07, + "loss": 0.3389, + "step": 1487 + }, + { + "epoch": 9.13, + "learning_rate": 3.954570327634677e-07, + "loss": 0.3252, + "step": 1488 + }, + { + "epoch": 9.13, + "learning_rate": 3.899431850359503e-07, + "loss": 0.3359, + "step": 1489 + }, + { + "epoch": 9.14, + "learning_rate": 3.8446728294281865e-07, + "loss": 0.3408, + "step": 1490 + }, + { + "epoch": 9.15, + "learning_rate": 3.7902934810585603e-07, + "loss": 0.3555, + "step": 1491 + }, + { + "epoch": 9.15, + "learning_rate": 3.736294019969311e-07, + "loss": 0.3066, + "step": 1492 + }, + { + "epoch": 9.16, + "learning_rate": 3.682674659379137e-07, + "loss": 0.3354, + "step": 1493 + }, + { + "epoch": 9.17, + "learning_rate": 3.629435611005916e-07, + "loss": 0.3721, + "step": 1494 + }, + { + "epoch": 9.17, + "learning_rate": 3.5765770850658244e-07, + "loss": 0.3271, + "step": 1495 + }, + { + "epoch": 9.18, + "learning_rate": 3.5240992902725204e-07, + "loss": 0.2993, + "step": 1496 + }, + { + "epoch": 9.18, + "learning_rate": 3.4720024338363633e-07, + "loss": 0.3398, + "step": 1497 + }, + { + "epoch": 9.19, + "learning_rate": 3.420286721463562e-07, + "loss": 0.3213, + "step": 1498 + }, + { + "epoch": 9.2, + "learning_rate": 3.3689523573553597e-07, + "loss": 0.3203, + "step": 1499 + }, + { + "epoch": 9.2, + "learning_rate": 3.3179995442071956e-07, + "loss": 0.3105, + "step": 1500 + }, + { + "epoch": 9.21, + "learning_rate": 3.2674284832080127e-07, + "loss": 0.3369, + "step": 1501 + }, + { + "epoch": 9.21, + "learning_rate": 3.217239374039338e-07, + "loss": 0.3384, + "step": 1502 + }, + { + "epoch": 9.22, + "learning_rate": 3.1674324148745827e-07, + "loss": 0.2983, + "step": 1503 + }, + { + "epoch": 9.23, + "learning_rate": 3.118007802378198e-07, + "loss": 0.374, + "step": 1504 + }, + { + "epoch": 9.23, + "learning_rate": 3.0689657317049205e-07, + "loss": 0.3257, + "step": 1505 + }, + { + "epoch": 9.24, + "learning_rate": 3.020306396499062e-07, + "loss": 0.3735, + "step": 1506 + }, + { + "epoch": 9.25, + "learning_rate": 2.972029988893621e-07, + "loss": 0.3589, + "step": 1507 + }, + { + "epoch": 9.25, + "learning_rate": 2.9241366995096387e-07, + "loss": 0.2961, + "step": 1508 + }, + { + "epoch": 9.26, + "learning_rate": 2.8766267174553884e-07, + "loss": 0.2913, + "step": 1509 + }, + { + "epoch": 9.26, + "learning_rate": 2.8295002303256546e-07, + "loss": 0.3169, + "step": 1510 + }, + { + "epoch": 9.27, + "learning_rate": 2.7827574242009434e-07, + "loss": 0.355, + "step": 1511 + }, + { + "epoch": 9.28, + "learning_rate": 2.736398483646807e-07, + "loss": 0.3374, + "step": 1512 + }, + { + "epoch": 9.28, + "learning_rate": 2.6904235917131094e-07, + "loss": 0.334, + "step": 1513 + }, + { + "epoch": 9.29, + "learning_rate": 2.64483292993325e-07, + "loss": 0.3369, + "step": 1514 + }, + { + "epoch": 9.29, + "learning_rate": 2.599626678323508e-07, + "loss": 0.3076, + "step": 1515 + }, + { + "epoch": 9.3, + "learning_rate": 2.554805015382289e-07, + "loss": 0.3066, + "step": 1516 + }, + { + "epoch": 9.31, + "learning_rate": 2.5103681180894566e-07, + "loss": 0.3735, + "step": 1517 + }, + { + "epoch": 9.31, + "learning_rate": 2.4663161619055797e-07, + "loss": 0.3203, + "step": 1518 + }, + { + "epoch": 9.32, + "learning_rate": 2.422649320771331e-07, + "loss": 0.2974, + "step": 1519 + }, + { + "epoch": 9.33, + "learning_rate": 2.3793677671066882e-07, + "loss": 0.2905, + "step": 1520 + }, + { + "epoch": 9.33, + "learning_rate": 2.3364716718103143e-07, + "loss": 0.3438, + "step": 1521 + }, + { + "epoch": 9.34, + "learning_rate": 2.293961204258932e-07, + "loss": 0.3091, + "step": 1522 + }, + { + "epoch": 9.34, + "learning_rate": 2.2518365323065284e-07, + "loss": 0.3037, + "step": 1523 + }, + { + "epoch": 9.35, + "learning_rate": 2.2100978222838186e-07, + "loss": 0.4043, + "step": 1524 + }, + { + "epoch": 9.36, + "learning_rate": 2.1687452389974829e-07, + "loss": 0.3203, + "step": 1525 + }, + { + "epoch": 9.36, + "learning_rate": 2.1277789457296306e-07, + "loss": 0.4023, + "step": 1526 + }, + { + "epoch": 9.37, + "learning_rate": 2.0871991042370255e-07, + "loss": 0.3345, + "step": 1527 + }, + { + "epoch": 9.37, + "learning_rate": 2.0470058747505516e-07, + "loss": 0.3618, + "step": 1528 + }, + { + "epoch": 9.38, + "learning_rate": 2.0071994159745367e-07, + "loss": 0.333, + "step": 1529 + }, + { + "epoch": 9.39, + "learning_rate": 1.9677798850861517e-07, + "loss": 0.3579, + "step": 1530 + }, + { + "epoch": 9.39, + "learning_rate": 1.9287474377347238e-07, + "loss": 0.3389, + "step": 1531 + }, + { + "epoch": 9.4, + "learning_rate": 1.8901022280411906e-07, + "loss": 0.292, + "step": 1532 + }, + { + "epoch": 9.4, + "learning_rate": 1.8518444085974697e-07, + "loss": 0.3896, + "step": 1533 + }, + { + "epoch": 9.41, + "learning_rate": 1.8139741304658566e-07, + "loss": 0.3501, + "step": 1534 + }, + { + "epoch": 9.42, + "learning_rate": 1.776491543178438e-07, + "loss": 0.3237, + "step": 1535 + }, + { + "epoch": 9.42, + "learning_rate": 1.739396794736481e-07, + "loss": 0.334, + "step": 1536 + }, + { + "epoch": 9.43, + "learning_rate": 1.7026900316098217e-07, + "loss": 0.332, + "step": 1537 + }, + { + "epoch": 9.44, + "learning_rate": 1.6663713987363882e-07, + "loss": 0.3452, + "step": 1538 + }, + { + "epoch": 9.44, + "learning_rate": 1.6304410395215243e-07, + "loss": 0.3301, + "step": 1539 + }, + { + "epoch": 9.45, + "learning_rate": 1.5948990958374543e-07, + "loss": 0.3374, + "step": 1540 + }, + { + "epoch": 9.45, + "learning_rate": 1.559745708022753e-07, + "loss": 0.2935, + "step": 1541 + }, + { + "epoch": 9.46, + "learning_rate": 1.5249810148817658e-07, + "loss": 0.3643, + "step": 1542 + }, + { + "epoch": 9.47, + "learning_rate": 1.490605153684066e-07, + "loss": 0.3765, + "step": 1543 + }, + { + "epoch": 9.47, + "learning_rate": 1.4566182601638779e-07, + "loss": 0.335, + "step": 1544 + }, + { + "epoch": 9.48, + "learning_rate": 1.4230204685196202e-07, + "loss": 0.3569, + "step": 1545 + }, + { + "epoch": 9.48, + "learning_rate": 1.3898119114133192e-07, + "loss": 0.356, + "step": 1546 + }, + { + "epoch": 9.49, + "learning_rate": 1.3569927199700628e-07, + "loss": 0.3247, + "step": 1547 + }, + { + "epoch": 9.5, + "learning_rate": 1.3245630237775585e-07, + "loss": 0.3125, + "step": 1548 + }, + { + "epoch": 9.5, + "learning_rate": 1.292522950885533e-07, + "loss": 0.3115, + "step": 1549 + }, + { + "epoch": 9.51, + "learning_rate": 1.2608726278053208e-07, + "loss": 0.3647, + "step": 1550 + }, + { + "epoch": 9.52, + "learning_rate": 1.2296121795092874e-07, + "loss": 0.3447, + "step": 1551 + }, + { + "epoch": 9.52, + "learning_rate": 1.1987417294303748e-07, + "loss": 0.3105, + "step": 1552 + }, + { + "epoch": 9.53, + "learning_rate": 1.1682613994615788e-07, + "loss": 0.3765, + "step": 1553 + }, + { + "epoch": 9.53, + "learning_rate": 1.1381713099555381e-07, + "loss": 0.3472, + "step": 1554 + }, + { + "epoch": 9.54, + "learning_rate": 1.1084715797239798e-07, + "loss": 0.2969, + "step": 1555 + }, + { + "epoch": 9.55, + "learning_rate": 1.0791623260372863e-07, + "loss": 0.3467, + "step": 1556 + }, + { + "epoch": 9.55, + "learning_rate": 1.0502436646240399e-07, + "loss": 0.3164, + "step": 1557 + }, + { + "epoch": 9.56, + "learning_rate": 1.0217157096705676e-07, + "loss": 0.3633, + "step": 1558 + }, + { + "epoch": 9.56, + "learning_rate": 9.935785738204417e-08, + "loss": 0.3267, + "step": 1559 + }, + { + "epoch": 9.57, + "learning_rate": 9.658323681741133e-08, + "loss": 0.3037, + "step": 1560 + }, + { + "epoch": 9.58, + "learning_rate": 9.384772022884015e-08, + "loss": 0.3833, + "step": 1561 + }, + { + "epoch": 9.58, + "learning_rate": 9.11513184176116e-08, + "loss": 0.3452, + "step": 1562 + }, + { + "epoch": 9.59, + "learning_rate": 8.8494042030558e-08, + "loss": 0.3096, + "step": 1563 + }, + { + "epoch": 9.6, + "learning_rate": 8.587590156002635e-08, + "loss": 0.3167, + "step": 1564 + }, + { + "epoch": 9.6, + "learning_rate": 8.329690734383278e-08, + "loss": 0.3413, + "step": 1565 + }, + { + "epoch": 9.61, + "learning_rate": 8.075706956522156e-08, + "loss": 0.3936, + "step": 1566 + }, + { + "epoch": 9.61, + "learning_rate": 7.825639825282949e-08, + "loss": 0.3364, + "step": 1567 + }, + { + "epoch": 9.62, + "learning_rate": 7.579490328064265e-08, + "loss": 0.3911, + "step": 1568 + }, + { + "epoch": 9.63, + "learning_rate": 7.33725943679553e-08, + "loss": 0.2969, + "step": 1569 + }, + { + "epoch": 9.63, + "learning_rate": 7.098948107933656e-08, + "loss": 0.3291, + "step": 1570 + }, + { + "epoch": 9.64, + "learning_rate": 6.864557282459162e-08, + "loss": 0.3184, + "step": 1571 + }, + { + "epoch": 9.64, + "learning_rate": 6.634087885871832e-08, + "loss": 0.335, + "step": 1572 + }, + { + "epoch": 9.65, + "learning_rate": 6.407540828188175e-08, + "loss": 0.3523, + "step": 1573 + }, + { + "epoch": 9.66, + "learning_rate": 6.184917003936752e-08, + "loss": 0.2961, + "step": 1574 + }, + { + "epoch": 9.66, + "learning_rate": 5.966217292155296e-08, + "loss": 0.3701, + "step": 1575 + }, + { + "epoch": 9.67, + "learning_rate": 5.7514425563870436e-08, + "loss": 0.3662, + "step": 1576 + }, + { + "epoch": 9.67, + "learning_rate": 5.540593644677295e-08, + "loss": 0.3115, + "step": 1577 + }, + { + "epoch": 9.68, + "learning_rate": 5.333671389569972e-08, + "loss": 0.3164, + "step": 1578 + }, + { + "epoch": 9.69, + "learning_rate": 5.1306766081048456e-08, + "loss": 0.3003, + "step": 1579 + }, + { + "epoch": 9.69, + "learning_rate": 4.931610101813533e-08, + "loss": 0.3164, + "step": 1580 + }, + { + "epoch": 9.7, + "learning_rate": 4.73647265671684e-08, + "loss": 0.3521, + "step": 1581 + }, + { + "epoch": 9.71, + "learning_rate": 4.545265043321645e-08, + "loss": 0.2876, + "step": 1582 + }, + { + "epoch": 9.71, + "learning_rate": 4.357988016617687e-08, + "loss": 0.2947, + "step": 1583 + }, + { + "epoch": 9.72, + "learning_rate": 4.174642316074562e-08, + "loss": 0.3423, + "step": 1584 + }, + { + "epoch": 9.72, + "learning_rate": 3.9952286656389506e-08, + "loss": 0.3438, + "step": 1585 + }, + { + "epoch": 9.73, + "learning_rate": 3.819747773731841e-08, + "loss": 0.3872, + "step": 1586 + }, + { + "epoch": 9.74, + "learning_rate": 3.648200333245422e-08, + "loss": 0.3247, + "step": 1587 + }, + { + "epoch": 9.74, + "learning_rate": 3.480587021540527e-08, + "loss": 0.3091, + "step": 1588 + }, + { + "epoch": 9.75, + "learning_rate": 3.316908500443972e-08, + "loss": 0.3633, + "step": 1589 + }, + { + "epoch": 9.75, + "learning_rate": 3.1571654162461107e-08, + "loss": 0.3281, + "step": 1590 + }, + { + "epoch": 9.76, + "learning_rate": 3.001358399697618e-08, + "loss": 0.3545, + "step": 1591 + }, + { + "epoch": 9.77, + "learning_rate": 2.8494880660080437e-08, + "loss": 0.3472, + "step": 1592 + }, + { + "epoch": 9.77, + "learning_rate": 2.7015550148423718e-08, + "loss": 0.3682, + "step": 1593 + }, + { + "epoch": 9.78, + "learning_rate": 2.557559830319245e-08, + "loss": 0.3105, + "step": 1594 + }, + { + "epoch": 9.79, + "learning_rate": 2.417503081008632e-08, + "loss": 0.3003, + "step": 1595 + }, + { + "epoch": 9.79, + "learning_rate": 2.2813853199292745e-08, + "loss": 0.3608, + "step": 1596 + }, + { + "epoch": 9.8, + "learning_rate": 2.1492070845468005e-08, + "loss": 0.2871, + "step": 1597 + }, + { + "epoch": 9.8, + "learning_rate": 2.0209688967713914e-08, + "loss": 0.3169, + "step": 1598 + }, + { + "epoch": 9.81, + "learning_rate": 1.896671262955896e-08, + "loss": 0.3218, + "step": 1599 + }, + { + "epoch": 9.82, + "learning_rate": 1.7763146738938307e-08, + "loss": 0.332, + "step": 1600 + }, + { + "epoch": 9.82, + "learning_rate": 1.659899604816939e-08, + "loss": 0.3013, + "step": 1601 + }, + { + "epoch": 9.83, + "learning_rate": 1.5474265153944124e-08, + "loss": 0.3262, + "step": 1602 + }, + { + "epoch": 9.83, + "learning_rate": 1.4388958497300043e-08, + "loss": 0.2925, + "step": 1603 + }, + { + "epoch": 9.84, + "learning_rate": 1.3343080363604766e-08, + "loss": 0.314, + "step": 1604 + }, + { + "epoch": 9.85, + "learning_rate": 1.2336634882544885e-08, + "loss": 0.3696, + "step": 1605 + }, + { + "epoch": 9.85, + "learning_rate": 1.1369626028104874e-08, + "loss": 0.3647, + "step": 1606 + }, + { + "epoch": 9.86, + "learning_rate": 1.0442057618551549e-08, + "loss": 0.3306, + "step": 1607 + }, + { + "epoch": 9.87, + "learning_rate": 9.553933316420739e-09, + "loss": 0.3916, + "step": 1608 + }, + { + "epoch": 9.87, + "learning_rate": 8.705256628499525e-09, + "loss": 0.3525, + "step": 1609 + }, + { + "epoch": 9.88, + "learning_rate": 7.896030905818474e-09, + "loss": 0.3662, + "step": 1610 + }, + { + "epoch": 9.88, + "learning_rate": 7.126259343631648e-09, + "loss": 0.3042, + "step": 1611 + }, + { + "epoch": 9.89, + "learning_rate": 6.39594498140883e-09, + "loss": 0.3257, + "step": 1612 + }, + { + "epoch": 9.9, + "learning_rate": 5.705090702819993e-09, + "loss": 0.3237, + "step": 1613 + }, + { + "epoch": 9.9, + "learning_rate": 5.053699235726406e-09, + "loss": 0.29, + "step": 1614 + }, + { + "epoch": 9.91, + "learning_rate": 4.4417731521717576e-09, + "loss": 0.3081, + "step": 1615 + }, + { + "epoch": 9.91, + "learning_rate": 3.869314868363283e-09, + "loss": 0.2944, + "step": 1616 + }, + { + "epoch": 9.92, + "learning_rate": 3.3363266446750918e-09, + "loss": 0.2676, + "step": 1617 + }, + { + "epoch": 9.93, + "learning_rate": 2.842810585627076e-09, + "loss": 0.3086, + "step": 1618 + }, + { + "epoch": 9.93, + "learning_rate": 2.388768639886019e-09, + "loss": 0.3047, + "step": 1619 + }, + { + "epoch": 9.94, + "learning_rate": 1.9742026002500526e-09, + "loss": 0.3242, + "step": 1620 + }, + { + "epoch": 9.94, + "learning_rate": 1.5991141036475478e-09, + "loss": 0.3086, + "step": 1621 + }, + { + "epoch": 9.95, + "learning_rate": 1.263504631129342e-09, + "loss": 0.3174, + "step": 1622 + }, + { + "epoch": 9.96, + "learning_rate": 9.673755078598578e-10, + "loss": 0.354, + "step": 1623 + }, + { + "epoch": 9.96, + "learning_rate": 7.107279031148828e-10, + "loss": 0.3208, + "step": 1624 + }, + { + "epoch": 9.97, + "learning_rate": 4.935628302760175e-10, + "loss": 0.3721, + "step": 1625 + }, + { + "epoch": 9.98, + "learning_rate": 3.158811468273459e-10, + "loss": 0.3354, + "step": 1626 + }, + { + "epoch": 9.98, + "learning_rate": 1.776835543509936e-10, + "loss": 0.3215, + "step": 1627 + }, + { + "epoch": 9.99, + "learning_rate": 7.897059852490785e-11, + "loss": 0.2866, + "step": 1628 + }, + { + "epoch": 9.99, + "learning_rate": 1.9742669119526824e-11, + "loss": 0.3057, + "step": 1629 + }, + { + "epoch": 10.0, + "learning_rate": 0.0, + "loss": 0.3101, + "step": 1630 + }, + { + "epoch": 10.0, + "step": 1630, + "total_flos": 41842376695808.0, + "train_loss": 1.8768900678201688, + "train_runtime": 1493.3467, + "train_samples_per_second": 69.736, + "train_steps_per_second": 1.092 + } + ], + "logging_steps": 1.0, + "max_steps": 1630, + "num_train_epochs": 10, + "save_steps": 10000, + "total_flos": 41842376695808.0, + "trial_name": null, + "trial_params": null +}