diff --git "a/Chinese_tiny_llm-zh_105k_en_13k/trainer_state.json" "b/Chinese_tiny_llm-zh_105k_en_13k/trainer_state.json" new file mode 100644--- /dev/null +++ "b/Chinese_tiny_llm-zh_105k_en_13k/trainer_state.json" @@ -0,0 +1,16653 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 2772, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.4388489208633095e-07, + "loss": 2.3547, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2.877697841726619e-07, + "loss": 2.3576, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 4.3165467625899287e-07, + "loss": 2.1275, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 5.755395683453238e-07, + "loss": 2.4731, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 7.194244604316547e-07, + "loss": 2.4011, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 8.633093525179857e-07, + "loss": 2.3406, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 1.0071942446043167e-06, + "loss": 2.4751, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 1.1510791366906476e-06, + "loss": 2.145, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 1.2949640287769785e-06, + "loss": 2.7092, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 1.4388489208633094e-06, + "loss": 2.3121, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 1.5827338129496403e-06, + "loss": 2.6237, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 1.7266187050359715e-06, + "loss": 2.3887, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 1.8705035971223024e-06, + "loss": 2.3881, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 2.0143884892086333e-06, + "loss": 2.1992, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 2.158273381294964e-06, + "loss": 2.6365, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 2.302158273381295e-06, + "loss": 2.1901, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 2.4460431654676263e-06, + "loss": 2.2422, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 2.589928057553957e-06, + "loss": 2.2557, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 2.733812949640288e-06, + "loss": 2.3875, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 2.877697841726619e-06, + "loss": 2.3422, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 3.02158273381295e-06, + "loss": 2.1384, + "step": 21 + }, + { + "epoch": 0.02, + "learning_rate": 3.1654676258992807e-06, + "loss": 1.8947, + "step": 22 + }, + { + "epoch": 0.02, + "learning_rate": 3.309352517985612e-06, + "loss": 2.0175, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 3.453237410071943e-06, + "loss": 2.0965, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 3.5971223021582737e-06, + "loss": 2.1806, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 3.741007194244605e-06, + "loss": 2.0644, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 3.884892086330936e-06, + "loss": 1.9973, + "step": 27 + }, + { + "epoch": 0.03, + "learning_rate": 4.028776978417267e-06, + "loss": 1.8138, + "step": 28 + }, + { + "epoch": 0.03, + "learning_rate": 4.172661870503597e-06, + "loss": 2.0755, + "step": 29 + }, + { + "epoch": 0.03, + "learning_rate": 4.316546762589928e-06, + "loss": 1.8519, + "step": 30 + }, + { + "epoch": 0.03, + "learning_rate": 4.46043165467626e-06, + "loss": 2.2144, + "step": 31 + }, + { + "epoch": 0.03, + "learning_rate": 4.60431654676259e-06, + "loss": 1.9688, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 4.748201438848921e-06, + "loss": 2.0193, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 4.892086330935253e-06, + "loss": 1.8964, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 5.035971223021583e-06, + "loss": 2.0533, + "step": 35 + }, + { + "epoch": 0.04, + "learning_rate": 5.179856115107914e-06, + "loss": 2.1172, + "step": 36 + }, + { + "epoch": 0.04, + "learning_rate": 5.3237410071942456e-06, + "loss": 2.1439, + "step": 37 + }, + { + "epoch": 0.04, + "learning_rate": 5.467625899280576e-06, + "loss": 1.8953, + "step": 38 + }, + { + "epoch": 0.04, + "learning_rate": 5.611510791366906e-06, + "loss": 2.0619, + "step": 39 + }, + { + "epoch": 0.04, + "learning_rate": 5.755395683453238e-06, + "loss": 2.0246, + "step": 40 + }, + { + "epoch": 0.04, + "learning_rate": 5.899280575539568e-06, + "loss": 1.9229, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 6.0431654676259e-06, + "loss": 1.9145, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 6.1870503597122315e-06, + "loss": 2.159, + "step": 43 + }, + { + "epoch": 0.05, + "learning_rate": 6.330935251798561e-06, + "loss": 1.91, + "step": 44 + }, + { + "epoch": 0.05, + "learning_rate": 6.474820143884892e-06, + "loss": 1.9358, + "step": 45 + }, + { + "epoch": 0.05, + "learning_rate": 6.618705035971224e-06, + "loss": 1.9698, + "step": 46 + }, + { + "epoch": 0.05, + "learning_rate": 6.762589928057554e-06, + "loss": 1.673, + "step": 47 + }, + { + "epoch": 0.05, + "learning_rate": 6.906474820143886e-06, + "loss": 1.9999, + "step": 48 + }, + { + "epoch": 0.05, + "learning_rate": 7.050359712230216e-06, + "loss": 2.0436, + "step": 49 + }, + { + "epoch": 0.05, + "learning_rate": 7.194244604316547e-06, + "loss": 2.0187, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 7.338129496402878e-06, + "loss": 1.8008, + "step": 51 + }, + { + "epoch": 0.06, + "learning_rate": 7.48201438848921e-06, + "loss": 2.1516, + "step": 52 + }, + { + "epoch": 0.06, + "learning_rate": 7.62589928057554e-06, + "loss": 1.8388, + "step": 53 + }, + { + "epoch": 0.06, + "learning_rate": 7.769784172661872e-06, + "loss": 1.9828, + "step": 54 + }, + { + "epoch": 0.06, + "learning_rate": 7.913669064748202e-06, + "loss": 1.7816, + "step": 55 + }, + { + "epoch": 0.06, + "learning_rate": 8.057553956834533e-06, + "loss": 1.8797, + "step": 56 + }, + { + "epoch": 0.06, + "learning_rate": 8.201438848920865e-06, + "loss": 1.8773, + "step": 57 + }, + { + "epoch": 0.06, + "learning_rate": 8.345323741007195e-06, + "loss": 1.8915, + "step": 58 + }, + { + "epoch": 0.06, + "learning_rate": 8.489208633093526e-06, + "loss": 1.8354, + "step": 59 + }, + { + "epoch": 0.06, + "learning_rate": 8.633093525179856e-06, + "loss": 1.9031, + "step": 60 + }, + { + "epoch": 0.07, + "learning_rate": 8.776978417266188e-06, + "loss": 1.8754, + "step": 61 + }, + { + "epoch": 0.07, + "learning_rate": 8.92086330935252e-06, + "loss": 1.772, + "step": 62 + }, + { + "epoch": 0.07, + "learning_rate": 9.064748201438849e-06, + "loss": 1.8173, + "step": 63 + }, + { + "epoch": 0.07, + "learning_rate": 9.20863309352518e-06, + "loss": 1.8111, + "step": 64 + }, + { + "epoch": 0.07, + "learning_rate": 9.35251798561151e-06, + "loss": 1.8438, + "step": 65 + }, + { + "epoch": 0.07, + "learning_rate": 9.496402877697842e-06, + "loss": 1.5675, + "step": 66 + }, + { + "epoch": 0.07, + "learning_rate": 9.640287769784174e-06, + "loss": 1.9983, + "step": 67 + }, + { + "epoch": 0.07, + "learning_rate": 9.784172661870505e-06, + "loss": 1.8764, + "step": 68 + }, + { + "epoch": 0.07, + "learning_rate": 9.928057553956835e-06, + "loss": 1.9815, + "step": 69 + }, + { + "epoch": 0.08, + "learning_rate": 1.0071942446043167e-05, + "loss": 1.9521, + "step": 70 + }, + { + "epoch": 0.08, + "learning_rate": 1.0215827338129498e-05, + "loss": 1.7376, + "step": 71 + }, + { + "epoch": 0.08, + "learning_rate": 1.0359712230215828e-05, + "loss": 1.9935, + "step": 72 + }, + { + "epoch": 0.08, + "learning_rate": 1.0503597122302158e-05, + "loss": 2.1314, + "step": 73 + }, + { + "epoch": 0.08, + "learning_rate": 1.0647482014388491e-05, + "loss": 1.8567, + "step": 74 + }, + { + "epoch": 0.08, + "learning_rate": 1.0791366906474821e-05, + "loss": 2.1048, + "step": 75 + }, + { + "epoch": 0.08, + "learning_rate": 1.0935251798561153e-05, + "loss": 2.0353, + "step": 76 + }, + { + "epoch": 0.08, + "learning_rate": 1.1079136690647482e-05, + "loss": 1.8824, + "step": 77 + }, + { + "epoch": 0.08, + "learning_rate": 1.1223021582733812e-05, + "loss": 2.0055, + "step": 78 + }, + { + "epoch": 0.09, + "learning_rate": 1.1366906474820146e-05, + "loss": 1.8171, + "step": 79 + }, + { + "epoch": 0.09, + "learning_rate": 1.1510791366906475e-05, + "loss": 1.9234, + "step": 80 + }, + { + "epoch": 0.09, + "learning_rate": 1.1654676258992807e-05, + "loss": 1.8208, + "step": 81 + }, + { + "epoch": 0.09, + "learning_rate": 1.1798561151079137e-05, + "loss": 1.7002, + "step": 82 + }, + { + "epoch": 0.09, + "learning_rate": 1.1942446043165468e-05, + "loss": 1.907, + "step": 83 + }, + { + "epoch": 0.09, + "learning_rate": 1.20863309352518e-05, + "loss": 1.9767, + "step": 84 + }, + { + "epoch": 0.09, + "learning_rate": 1.223021582733813e-05, + "loss": 1.7917, + "step": 85 + }, + { + "epoch": 0.09, + "learning_rate": 1.2374100719424463e-05, + "loss": 1.6337, + "step": 86 + }, + { + "epoch": 0.09, + "learning_rate": 1.2517985611510793e-05, + "loss": 2.0116, + "step": 87 + }, + { + "epoch": 0.1, + "learning_rate": 1.2661870503597123e-05, + "loss": 2.0588, + "step": 88 + }, + { + "epoch": 0.1, + "learning_rate": 1.2805755395683454e-05, + "loss": 1.5452, + "step": 89 + }, + { + "epoch": 0.1, + "learning_rate": 1.2949640287769784e-05, + "loss": 1.8182, + "step": 90 + }, + { + "epoch": 0.1, + "learning_rate": 1.3093525179856117e-05, + "loss": 2.0173, + "step": 91 + }, + { + "epoch": 0.1, + "learning_rate": 1.3237410071942447e-05, + "loss": 1.5917, + "step": 92 + }, + { + "epoch": 0.1, + "learning_rate": 1.3381294964028777e-05, + "loss": 1.845, + "step": 93 + }, + { + "epoch": 0.1, + "learning_rate": 1.3525179856115109e-05, + "loss": 1.9642, + "step": 94 + }, + { + "epoch": 0.1, + "learning_rate": 1.3669064748201439e-05, + "loss": 2.0804, + "step": 95 + }, + { + "epoch": 0.1, + "learning_rate": 1.3812949640287772e-05, + "loss": 1.8469, + "step": 96 + }, + { + "epoch": 0.1, + "learning_rate": 1.3956834532374102e-05, + "loss": 2.0403, + "step": 97 + }, + { + "epoch": 0.11, + "learning_rate": 1.4100719424460432e-05, + "loss": 2.1303, + "step": 98 + }, + { + "epoch": 0.11, + "learning_rate": 1.4244604316546765e-05, + "loss": 2.1525, + "step": 99 + }, + { + "epoch": 0.11, + "learning_rate": 1.4388489208633095e-05, + "loss": 1.6616, + "step": 100 + }, + { + "epoch": 0.11, + "learning_rate": 1.4532374100719426e-05, + "loss": 2.0293, + "step": 101 + }, + { + "epoch": 0.11, + "learning_rate": 1.4676258992805756e-05, + "loss": 2.0534, + "step": 102 + }, + { + "epoch": 0.11, + "learning_rate": 1.4820143884892086e-05, + "loss": 2.0292, + "step": 103 + }, + { + "epoch": 0.11, + "learning_rate": 1.496402877697842e-05, + "loss": 1.8536, + "step": 104 + }, + { + "epoch": 0.11, + "learning_rate": 1.5107913669064749e-05, + "loss": 1.8581, + "step": 105 + }, + { + "epoch": 0.11, + "learning_rate": 1.525179856115108e-05, + "loss": 1.6334, + "step": 106 + }, + { + "epoch": 0.12, + "learning_rate": 1.5395683453237412e-05, + "loss": 1.8721, + "step": 107 + }, + { + "epoch": 0.12, + "learning_rate": 1.5539568345323744e-05, + "loss": 1.8919, + "step": 108 + }, + { + "epoch": 0.12, + "learning_rate": 1.5683453237410072e-05, + "loss": 1.7582, + "step": 109 + }, + { + "epoch": 0.12, + "learning_rate": 1.5827338129496403e-05, + "loss": 1.7654, + "step": 110 + }, + { + "epoch": 0.12, + "learning_rate": 1.5971223021582735e-05, + "loss": 1.9529, + "step": 111 + }, + { + "epoch": 0.12, + "learning_rate": 1.6115107913669067e-05, + "loss": 2.0011, + "step": 112 + }, + { + "epoch": 0.12, + "learning_rate": 1.6258992805755398e-05, + "loss": 1.7407, + "step": 113 + }, + { + "epoch": 0.12, + "learning_rate": 1.640287769784173e-05, + "loss": 2.0088, + "step": 114 + }, + { + "epoch": 0.12, + "learning_rate": 1.6546762589928058e-05, + "loss": 2.0356, + "step": 115 + }, + { + "epoch": 0.13, + "learning_rate": 1.669064748201439e-05, + "loss": 1.9327, + "step": 116 + }, + { + "epoch": 0.13, + "learning_rate": 1.683453237410072e-05, + "loss": 2.0276, + "step": 117 + }, + { + "epoch": 0.13, + "learning_rate": 1.6978417266187053e-05, + "loss": 1.7289, + "step": 118 + }, + { + "epoch": 0.13, + "learning_rate": 1.7122302158273384e-05, + "loss": 2.1943, + "step": 119 + }, + { + "epoch": 0.13, + "learning_rate": 1.7266187050359712e-05, + "loss": 1.8819, + "step": 120 + }, + { + "epoch": 0.13, + "learning_rate": 1.7410071942446044e-05, + "loss": 2.142, + "step": 121 + }, + { + "epoch": 0.13, + "learning_rate": 1.7553956834532375e-05, + "loss": 1.9633, + "step": 122 + }, + { + "epoch": 0.13, + "learning_rate": 1.7697841726618707e-05, + "loss": 1.9181, + "step": 123 + }, + { + "epoch": 0.13, + "learning_rate": 1.784172661870504e-05, + "loss": 1.9825, + "step": 124 + }, + { + "epoch": 0.14, + "learning_rate": 1.7985611510791367e-05, + "loss": 1.9971, + "step": 125 + }, + { + "epoch": 0.14, + "learning_rate": 1.8129496402877698e-05, + "loss": 1.9008, + "step": 126 + }, + { + "epoch": 0.14, + "learning_rate": 1.827338129496403e-05, + "loss": 2.1904, + "step": 127 + }, + { + "epoch": 0.14, + "learning_rate": 1.841726618705036e-05, + "loss": 2.0583, + "step": 128 + }, + { + "epoch": 0.14, + "learning_rate": 1.8561151079136693e-05, + "loss": 2.1272, + "step": 129 + }, + { + "epoch": 0.14, + "learning_rate": 1.870503597122302e-05, + "loss": 2.0181, + "step": 130 + }, + { + "epoch": 0.14, + "learning_rate": 1.8848920863309356e-05, + "loss": 1.873, + "step": 131 + }, + { + "epoch": 0.14, + "learning_rate": 1.8992805755395684e-05, + "loss": 1.8865, + "step": 132 + }, + { + "epoch": 0.14, + "learning_rate": 1.9136690647482016e-05, + "loss": 1.7558, + "step": 133 + }, + { + "epoch": 0.15, + "learning_rate": 1.9280575539568347e-05, + "loss": 1.9659, + "step": 134 + }, + { + "epoch": 0.15, + "learning_rate": 1.9424460431654675e-05, + "loss": 1.8793, + "step": 135 + }, + { + "epoch": 0.15, + "learning_rate": 1.956834532374101e-05, + "loss": 1.7461, + "step": 136 + }, + { + "epoch": 0.15, + "learning_rate": 1.971223021582734e-05, + "loss": 1.9957, + "step": 137 + }, + { + "epoch": 0.15, + "learning_rate": 1.985611510791367e-05, + "loss": 1.8534, + "step": 138 + }, + { + "epoch": 0.15, + "learning_rate": 2e-05, + "loss": 1.8629, + "step": 139 + }, + { + "epoch": 0.15, + "learning_rate": 1.9999992881836162e-05, + "loss": 1.8998, + "step": 140 + }, + { + "epoch": 0.15, + "learning_rate": 1.9999971527354777e-05, + "loss": 1.869, + "step": 141 + }, + { + "epoch": 0.15, + "learning_rate": 1.9999935936586245e-05, + "loss": 1.951, + "step": 142 + }, + { + "epoch": 0.15, + "learning_rate": 1.999988610958124e-05, + "loss": 1.9328, + "step": 143 + }, + { + "epoch": 0.16, + "learning_rate": 1.999982204641069e-05, + "loss": 1.6579, + "step": 144 + }, + { + "epoch": 0.16, + "learning_rate": 1.9999743747165806e-05, + "loss": 1.817, + "step": 145 + }, + { + "epoch": 0.16, + "learning_rate": 1.9999651211958052e-05, + "loss": 1.6378, + "step": 146 + }, + { + "epoch": 0.16, + "learning_rate": 1.9999544440919166e-05, + "loss": 1.863, + "step": 147 + }, + { + "epoch": 0.16, + "learning_rate": 1.9999423434201147e-05, + "loss": 1.8736, + "step": 148 + }, + { + "epoch": 0.16, + "learning_rate": 1.9999288191976264e-05, + "loss": 1.9776, + "step": 149 + }, + { + "epoch": 0.16, + "learning_rate": 1.999913871443706e-05, + "loss": 1.9897, + "step": 150 + }, + { + "epoch": 0.16, + "learning_rate": 1.9998975001796328e-05, + "loss": 1.8387, + "step": 151 + }, + { + "epoch": 0.16, + "learning_rate": 1.999879705428714e-05, + "loss": 1.6932, + "step": 152 + }, + { + "epoch": 0.17, + "learning_rate": 1.9998604872162825e-05, + "loss": 2.2017, + "step": 153 + }, + { + "epoch": 0.17, + "learning_rate": 1.9998398455696983e-05, + "loss": 1.8806, + "step": 154 + }, + { + "epoch": 0.17, + "learning_rate": 1.9998177805183467e-05, + "loss": 1.7912, + "step": 155 + }, + { + "epoch": 0.17, + "learning_rate": 1.999794292093641e-05, + "loss": 1.6728, + "step": 156 + }, + { + "epoch": 0.17, + "learning_rate": 1.99976938032902e-05, + "loss": 2.026, + "step": 157 + }, + { + "epoch": 0.17, + "learning_rate": 1.999743045259949e-05, + "loss": 2.1211, + "step": 158 + }, + { + "epoch": 0.17, + "learning_rate": 1.999715286923919e-05, + "loss": 1.8448, + "step": 159 + }, + { + "epoch": 0.17, + "learning_rate": 1.999686105360448e-05, + "loss": 2.0976, + "step": 160 + }, + { + "epoch": 0.17, + "learning_rate": 1.9996555006110802e-05, + "loss": 1.9568, + "step": 161 + }, + { + "epoch": 0.18, + "learning_rate": 1.9996234727193845e-05, + "loss": 1.7825, + "step": 162 + }, + { + "epoch": 0.18, + "learning_rate": 1.9995900217309575e-05, + "loss": 1.7175, + "step": 163 + }, + { + "epoch": 0.18, + "learning_rate": 1.999555147693421e-05, + "loss": 1.6861, + "step": 164 + }, + { + "epoch": 0.18, + "learning_rate": 1.9995188506564234e-05, + "loss": 2.016, + "step": 165 + }, + { + "epoch": 0.18, + "learning_rate": 1.9994811306716374e-05, + "loss": 1.9465, + "step": 166 + }, + { + "epoch": 0.18, + "learning_rate": 1.9994419877927632e-05, + "loss": 2.0717, + "step": 167 + }, + { + "epoch": 0.18, + "learning_rate": 1.9994014220755255e-05, + "loss": 2.1168, + "step": 168 + }, + { + "epoch": 0.18, + "learning_rate": 1.999359433577675e-05, + "loss": 1.8352, + "step": 169 + }, + { + "epoch": 0.18, + "learning_rate": 1.9993160223589875e-05, + "loss": 1.8891, + "step": 170 + }, + { + "epoch": 0.19, + "learning_rate": 1.9992711884812656e-05, + "loss": 1.6747, + "step": 171 + }, + { + "epoch": 0.19, + "learning_rate": 1.9992249320083355e-05, + "loss": 2.0694, + "step": 172 + }, + { + "epoch": 0.19, + "learning_rate": 1.9991772530060497e-05, + "loss": 2.0438, + "step": 173 + }, + { + "epoch": 0.19, + "learning_rate": 1.999128151542286e-05, + "loss": 1.9977, + "step": 174 + }, + { + "epoch": 0.19, + "learning_rate": 1.9990776276869456e-05, + "loss": 1.6287, + "step": 175 + }, + { + "epoch": 0.19, + "learning_rate": 1.9990256815119572e-05, + "loss": 1.8438, + "step": 176 + }, + { + "epoch": 0.19, + "learning_rate": 1.9989723130912722e-05, + "loss": 1.7703, + "step": 177 + }, + { + "epoch": 0.19, + "learning_rate": 1.9989175225008685e-05, + "loss": 1.8473, + "step": 178 + }, + { + "epoch": 0.19, + "learning_rate": 1.9988613098187476e-05, + "loss": 1.8733, + "step": 179 + }, + { + "epoch": 0.19, + "learning_rate": 1.9988036751249348e-05, + "loss": 1.8972, + "step": 180 + }, + { + "epoch": 0.2, + "learning_rate": 1.9987446185014815e-05, + "loss": 2.0464, + "step": 181 + }, + { + "epoch": 0.2, + "learning_rate": 1.9986841400324625e-05, + "loss": 1.7597, + "step": 182 + }, + { + "epoch": 0.2, + "learning_rate": 1.998622239803977e-05, + "loss": 1.8031, + "step": 183 + }, + { + "epoch": 0.2, + "learning_rate": 1.9985589179041485e-05, + "loss": 2.103, + "step": 184 + }, + { + "epoch": 0.2, + "learning_rate": 1.9984941744231233e-05, + "loss": 1.9884, + "step": 185 + }, + { + "epoch": 0.2, + "learning_rate": 1.998428009453073e-05, + "loss": 1.8569, + "step": 186 + }, + { + "epoch": 0.2, + "learning_rate": 1.998360423088192e-05, + "loss": 2.0007, + "step": 187 + }, + { + "epoch": 0.2, + "learning_rate": 1.9982914154246986e-05, + "loss": 1.7048, + "step": 188 + }, + { + "epoch": 0.2, + "learning_rate": 1.998220986560834e-05, + "loss": 2.0295, + "step": 189 + }, + { + "epoch": 0.21, + "learning_rate": 1.9981491365968635e-05, + "loss": 1.6678, + "step": 190 + }, + { + "epoch": 0.21, + "learning_rate": 1.998075865635075e-05, + "loss": 1.801, + "step": 191 + }, + { + "epoch": 0.21, + "learning_rate": 1.998001173779779e-05, + "loss": 1.7313, + "step": 192 + }, + { + "epoch": 0.21, + "learning_rate": 1.99792506113731e-05, + "loss": 1.8665, + "step": 193 + }, + { + "epoch": 0.21, + "learning_rate": 1.9978475278160236e-05, + "loss": 1.8131, + "step": 194 + }, + { + "epoch": 0.21, + "learning_rate": 1.9977685739262996e-05, + "loss": 1.9217, + "step": 195 + }, + { + "epoch": 0.21, + "learning_rate": 1.9976881995805387e-05, + "loss": 1.9435, + "step": 196 + }, + { + "epoch": 0.21, + "learning_rate": 1.9976064048931648e-05, + "loss": 2.0235, + "step": 197 + }, + { + "epoch": 0.21, + "learning_rate": 1.9975231899806233e-05, + "loss": 1.8873, + "step": 198 + }, + { + "epoch": 0.22, + "learning_rate": 1.997438554961382e-05, + "loss": 2.0316, + "step": 199 + }, + { + "epoch": 0.22, + "learning_rate": 1.9973524999559295e-05, + "loss": 1.7718, + "step": 200 + }, + { + "epoch": 0.22, + "learning_rate": 1.9972650250867773e-05, + "loss": 2.0169, + "step": 201 + }, + { + "epoch": 0.22, + "learning_rate": 1.997176130478457e-05, + "loss": 1.9306, + "step": 202 + }, + { + "epoch": 0.22, + "learning_rate": 1.997085816257522e-05, + "loss": 1.9852, + "step": 203 + }, + { + "epoch": 0.22, + "learning_rate": 1.9969940825525462e-05, + "loss": 1.8372, + "step": 204 + }, + { + "epoch": 0.22, + "learning_rate": 1.9969009294941252e-05, + "loss": 1.8715, + "step": 205 + }, + { + "epoch": 0.22, + "learning_rate": 1.9968063572148748e-05, + "loss": 1.863, + "step": 206 + }, + { + "epoch": 0.22, + "learning_rate": 1.9967103658494308e-05, + "loss": 1.8807, + "step": 207 + }, + { + "epoch": 0.23, + "learning_rate": 1.99661295553445e-05, + "loss": 1.9153, + "step": 208 + }, + { + "epoch": 0.23, + "learning_rate": 1.9965141264086084e-05, + "loss": 2.0213, + "step": 209 + }, + { + "epoch": 0.23, + "learning_rate": 1.996413878612603e-05, + "loss": 1.707, + "step": 210 + }, + { + "epoch": 0.23, + "learning_rate": 1.99631221228915e-05, + "loss": 1.8745, + "step": 211 + }, + { + "epoch": 0.23, + "learning_rate": 1.9962091275829844e-05, + "loss": 1.7234, + "step": 212 + }, + { + "epoch": 0.23, + "learning_rate": 1.9961046246408603e-05, + "loss": 1.8484, + "step": 213 + }, + { + "epoch": 0.23, + "learning_rate": 1.995998703611553e-05, + "loss": 1.7754, + "step": 214 + }, + { + "epoch": 0.23, + "learning_rate": 1.9958913646458546e-05, + "loss": 2.0296, + "step": 215 + }, + { + "epoch": 0.23, + "learning_rate": 1.9957826078965756e-05, + "loss": 1.5189, + "step": 216 + }, + { + "epoch": 0.23, + "learning_rate": 1.9956724335185466e-05, + "loss": 1.4644, + "step": 217 + }, + { + "epoch": 0.24, + "learning_rate": 1.9955608416686147e-05, + "loss": 1.9898, + "step": 218 + }, + { + "epoch": 0.24, + "learning_rate": 1.995447832505647e-05, + "loss": 2.1078, + "step": 219 + }, + { + "epoch": 0.24, + "learning_rate": 1.9953334061905254e-05, + "loss": 1.7983, + "step": 220 + }, + { + "epoch": 0.24, + "learning_rate": 1.995217562886152e-05, + "loss": 1.7168, + "step": 221 + }, + { + "epoch": 0.24, + "learning_rate": 1.995100302757445e-05, + "loss": 1.8447, + "step": 222 + }, + { + "epoch": 0.24, + "learning_rate": 1.9949816259713394e-05, + "loss": 1.9374, + "step": 223 + }, + { + "epoch": 0.24, + "learning_rate": 1.994861532696788e-05, + "loss": 2.0859, + "step": 224 + }, + { + "epoch": 0.24, + "learning_rate": 1.9947400231047584e-05, + "loss": 1.8303, + "step": 225 + }, + { + "epoch": 0.24, + "learning_rate": 1.9946170973682367e-05, + "loss": 1.5848, + "step": 226 + }, + { + "epoch": 0.25, + "learning_rate": 1.9944927556622236e-05, + "loss": 1.8136, + "step": 227 + }, + { + "epoch": 0.25, + "learning_rate": 1.9943669981637367e-05, + "loss": 1.7803, + "step": 228 + }, + { + "epoch": 0.25, + "learning_rate": 1.994239825051807e-05, + "loss": 1.9902, + "step": 229 + }, + { + "epoch": 0.25, + "learning_rate": 1.9941112365074836e-05, + "loss": 1.7615, + "step": 230 + }, + { + "epoch": 0.25, + "learning_rate": 1.993981232713829e-05, + "loss": 2.0408, + "step": 231 + }, + { + "epoch": 0.25, + "learning_rate": 1.9938498138559205e-05, + "loss": 1.6415, + "step": 232 + }, + { + "epoch": 0.25, + "learning_rate": 1.993716980120851e-05, + "loss": 1.8852, + "step": 233 + }, + { + "epoch": 0.25, + "learning_rate": 1.9935827316977266e-05, + "loss": 1.9254, + "step": 234 + }, + { + "epoch": 0.25, + "learning_rate": 1.9934470687776674e-05, + "loss": 1.9132, + "step": 235 + }, + { + "epoch": 0.26, + "learning_rate": 1.993309991553808e-05, + "loss": 1.8581, + "step": 236 + }, + { + "epoch": 0.26, + "learning_rate": 1.993171500221296e-05, + "loss": 1.8959, + "step": 237 + }, + { + "epoch": 0.26, + "learning_rate": 1.993031594977292e-05, + "loss": 1.8841, + "step": 238 + }, + { + "epoch": 0.26, + "learning_rate": 1.99289027602097e-05, + "loss": 1.8922, + "step": 239 + }, + { + "epoch": 0.26, + "learning_rate": 1.9927475435535156e-05, + "loss": 1.66, + "step": 240 + }, + { + "epoch": 0.26, + "learning_rate": 1.9926033977781284e-05, + "loss": 2.0959, + "step": 241 + }, + { + "epoch": 0.26, + "learning_rate": 1.9924578389000185e-05, + "loss": 1.8981, + "step": 242 + }, + { + "epoch": 0.26, + "learning_rate": 1.992310867126408e-05, + "loss": 2.079, + "step": 243 + }, + { + "epoch": 0.26, + "learning_rate": 1.9921624826665316e-05, + "loss": 1.896, + "step": 244 + }, + { + "epoch": 0.27, + "learning_rate": 1.9920126857316334e-05, + "loss": 2.0524, + "step": 245 + }, + { + "epoch": 0.27, + "learning_rate": 1.99186147653497e-05, + "loss": 1.7147, + "step": 246 + }, + { + "epoch": 0.27, + "learning_rate": 1.991708855291807e-05, + "loss": 1.8989, + "step": 247 + }, + { + "epoch": 0.27, + "learning_rate": 1.9915548222194215e-05, + "loss": 1.6191, + "step": 248 + }, + { + "epoch": 0.27, + "learning_rate": 1.9913993775371e-05, + "loss": 1.8722, + "step": 249 + }, + { + "epoch": 0.27, + "learning_rate": 1.9912425214661386e-05, + "loss": 1.8121, + "step": 250 + }, + { + "epoch": 0.27, + "learning_rate": 1.9910842542298425e-05, + "loss": 1.8755, + "step": 251 + }, + { + "epoch": 0.27, + "learning_rate": 1.9909245760535263e-05, + "loss": 2.0044, + "step": 252 + }, + { + "epoch": 0.27, + "learning_rate": 1.990763487164513e-05, + "loss": 1.7097, + "step": 253 + }, + { + "epoch": 0.27, + "learning_rate": 1.9906009877921345e-05, + "loss": 2.0125, + "step": 254 + }, + { + "epoch": 0.28, + "learning_rate": 1.9904370781677294e-05, + "loss": 1.9329, + "step": 255 + }, + { + "epoch": 0.28, + "learning_rate": 1.9902717585246456e-05, + "loss": 2.0188, + "step": 256 + }, + { + "epoch": 0.28, + "learning_rate": 1.990105029098237e-05, + "loss": 1.8302, + "step": 257 + }, + { + "epoch": 0.28, + "learning_rate": 1.9899368901258652e-05, + "loss": 1.9421, + "step": 258 + }, + { + "epoch": 0.28, + "learning_rate": 1.989767341846899e-05, + "loss": 1.952, + "step": 259 + }, + { + "epoch": 0.28, + "learning_rate": 1.989596384502712e-05, + "loss": 1.7265, + "step": 260 + }, + { + "epoch": 0.28, + "learning_rate": 1.9894240183366847e-05, + "loss": 2.014, + "step": 261 + }, + { + "epoch": 0.28, + "learning_rate": 1.989250243594204e-05, + "loss": 1.7925, + "step": 262 + }, + { + "epoch": 0.28, + "learning_rate": 1.9890750605226606e-05, + "loss": 1.9085, + "step": 263 + }, + { + "epoch": 0.29, + "learning_rate": 1.988898469371451e-05, + "loss": 1.7916, + "step": 264 + }, + { + "epoch": 0.29, + "learning_rate": 1.9887204703919764e-05, + "loss": 1.5943, + "step": 265 + }, + { + "epoch": 0.29, + "learning_rate": 1.9885410638376418e-05, + "loss": 2.0056, + "step": 266 + }, + { + "epoch": 0.29, + "learning_rate": 1.9883602499638565e-05, + "loss": 1.7821, + "step": 267 + }, + { + "epoch": 0.29, + "learning_rate": 1.9881780290280327e-05, + "loss": 2.0791, + "step": 268 + }, + { + "epoch": 0.29, + "learning_rate": 1.9879944012895863e-05, + "loss": 1.8741, + "step": 269 + }, + { + "epoch": 0.29, + "learning_rate": 1.9878093670099355e-05, + "loss": 1.9063, + "step": 270 + }, + { + "epoch": 0.29, + "learning_rate": 1.9876229264525012e-05, + "loss": 1.9244, + "step": 271 + }, + { + "epoch": 0.29, + "learning_rate": 1.987435079882707e-05, + "loss": 1.9378, + "step": 272 + }, + { + "epoch": 0.3, + "learning_rate": 1.9872458275679766e-05, + "loss": 1.8986, + "step": 273 + }, + { + "epoch": 0.3, + "learning_rate": 1.987055169777736e-05, + "loss": 1.8158, + "step": 274 + }, + { + "epoch": 0.3, + "learning_rate": 1.986863106783412e-05, + "loss": 1.4929, + "step": 275 + }, + { + "epoch": 0.3, + "learning_rate": 1.9866696388584316e-05, + "loss": 1.8164, + "step": 276 + }, + { + "epoch": 0.3, + "learning_rate": 1.9864747662782226e-05, + "loss": 1.6363, + "step": 277 + }, + { + "epoch": 0.3, + "learning_rate": 1.9862784893202113e-05, + "loss": 1.7578, + "step": 278 + }, + { + "epoch": 0.3, + "learning_rate": 1.9860808082638245e-05, + "loss": 1.8658, + "step": 279 + }, + { + "epoch": 0.3, + "learning_rate": 1.9858817233904872e-05, + "loss": 1.6387, + "step": 280 + }, + { + "epoch": 0.3, + "learning_rate": 1.985681234983623e-05, + "loss": 1.7362, + "step": 281 + }, + { + "epoch": 0.31, + "learning_rate": 1.9854793433286544e-05, + "loss": 1.8246, + "step": 282 + }, + { + "epoch": 0.31, + "learning_rate": 1.9852760487129998e-05, + "loss": 1.7408, + "step": 283 + }, + { + "epoch": 0.31, + "learning_rate": 1.9850713514260772e-05, + "loss": 1.8302, + "step": 284 + }, + { + "epoch": 0.31, + "learning_rate": 1.9848652517593005e-05, + "loss": 1.7418, + "step": 285 + }, + { + "epoch": 0.31, + "learning_rate": 1.9846577500060786e-05, + "loss": 1.802, + "step": 286 + }, + { + "epoch": 0.31, + "learning_rate": 1.9844488464618192e-05, + "loss": 1.6638, + "step": 287 + }, + { + "epoch": 0.31, + "learning_rate": 1.984238541423923e-05, + "loss": 1.9796, + "step": 288 + }, + { + "epoch": 0.31, + "learning_rate": 1.9840268351917883e-05, + "loss": 2.012, + "step": 289 + }, + { + "epoch": 0.31, + "learning_rate": 1.983813728066806e-05, + "loss": 1.7997, + "step": 290 + }, + { + "epoch": 0.31, + "learning_rate": 1.9835992203523636e-05, + "loss": 1.7265, + "step": 291 + }, + { + "epoch": 0.32, + "learning_rate": 1.98338331235384e-05, + "loss": 1.9192, + "step": 292 + }, + { + "epoch": 0.32, + "learning_rate": 1.9831660043786097e-05, + "loss": 1.72, + "step": 293 + }, + { + "epoch": 0.32, + "learning_rate": 1.982947296736039e-05, + "loss": 1.984, + "step": 294 + }, + { + "epoch": 0.32, + "learning_rate": 1.9827271897374883e-05, + "loss": 1.6382, + "step": 295 + }, + { + "epoch": 0.32, + "learning_rate": 1.9825056836963078e-05, + "loss": 1.8195, + "step": 296 + }, + { + "epoch": 0.32, + "learning_rate": 1.9822827789278412e-05, + "loss": 1.9058, + "step": 297 + }, + { + "epoch": 0.32, + "learning_rate": 1.9820584757494234e-05, + "loss": 1.8352, + "step": 298 + }, + { + "epoch": 0.32, + "learning_rate": 1.9818327744803796e-05, + "loss": 1.6767, + "step": 299 + }, + { + "epoch": 0.32, + "learning_rate": 1.9816056754420253e-05, + "loss": 1.7036, + "step": 300 + }, + { + "epoch": 0.33, + "learning_rate": 1.9813771789576663e-05, + "loss": 1.9956, + "step": 301 + }, + { + "epoch": 0.33, + "learning_rate": 1.9811472853525978e-05, + "loss": 2.1392, + "step": 302 + }, + { + "epoch": 0.33, + "learning_rate": 1.980915994954103e-05, + "loss": 1.9096, + "step": 303 + }, + { + "epoch": 0.33, + "learning_rate": 1.980683308091456e-05, + "loss": 1.7885, + "step": 304 + }, + { + "epoch": 0.33, + "learning_rate": 1.9804492250959164e-05, + "loss": 1.8036, + "step": 305 + }, + { + "epoch": 0.33, + "learning_rate": 1.9802137463007327e-05, + "loss": 1.7883, + "step": 306 + }, + { + "epoch": 0.33, + "learning_rate": 1.97997687204114e-05, + "loss": 1.9368, + "step": 307 + }, + { + "epoch": 0.33, + "learning_rate": 1.9797386026543607e-05, + "loss": 1.8897, + "step": 308 + }, + { + "epoch": 0.33, + "learning_rate": 1.9794989384796025e-05, + "loss": 1.8347, + "step": 309 + }, + { + "epoch": 0.34, + "learning_rate": 1.9792578798580597e-05, + "loss": 1.9128, + "step": 310 + }, + { + "epoch": 0.34, + "learning_rate": 1.9790154271329105e-05, + "loss": 1.9121, + "step": 311 + }, + { + "epoch": 0.34, + "learning_rate": 1.978771580649319e-05, + "loss": 1.9557, + "step": 312 + }, + { + "epoch": 0.34, + "learning_rate": 1.978526340754433e-05, + "loss": 1.9909, + "step": 313 + }, + { + "epoch": 0.34, + "learning_rate": 1.978279707797384e-05, + "loss": 1.7941, + "step": 314 + }, + { + "epoch": 0.34, + "learning_rate": 1.978031682129287e-05, + "loss": 1.8897, + "step": 315 + }, + { + "epoch": 0.34, + "learning_rate": 1.977782264103239e-05, + "loss": 1.9731, + "step": 316 + }, + { + "epoch": 0.34, + "learning_rate": 1.97753145407432e-05, + "loss": 2.0019, + "step": 317 + }, + { + "epoch": 0.34, + "learning_rate": 1.9772792523995912e-05, + "loss": 2.01, + "step": 318 + }, + { + "epoch": 0.35, + "learning_rate": 1.9770256594380955e-05, + "loss": 1.9104, + "step": 319 + }, + { + "epoch": 0.35, + "learning_rate": 1.976770675550856e-05, + "loss": 1.9926, + "step": 320 + }, + { + "epoch": 0.35, + "learning_rate": 1.9765143011008758e-05, + "loss": 2.1075, + "step": 321 + }, + { + "epoch": 0.35, + "learning_rate": 1.9762565364531383e-05, + "loss": 1.916, + "step": 322 + }, + { + "epoch": 0.35, + "learning_rate": 1.9759973819746055e-05, + "loss": 1.7118, + "step": 323 + }, + { + "epoch": 0.35, + "learning_rate": 1.9757368380342185e-05, + "loss": 2.1362, + "step": 324 + }, + { + "epoch": 0.35, + "learning_rate": 1.975474905002896e-05, + "loss": 1.8019, + "step": 325 + }, + { + "epoch": 0.35, + "learning_rate": 1.9752115832535343e-05, + "loss": 1.8218, + "step": 326 + }, + { + "epoch": 0.35, + "learning_rate": 1.9749468731610072e-05, + "loss": 1.7612, + "step": 327 + }, + { + "epoch": 0.35, + "learning_rate": 1.9746807751021645e-05, + "loss": 1.9373, + "step": 328 + }, + { + "epoch": 0.36, + "learning_rate": 1.974413289455832e-05, + "loss": 1.9847, + "step": 329 + }, + { + "epoch": 0.36, + "learning_rate": 1.974144416602811e-05, + "loss": 1.7258, + "step": 330 + }, + { + "epoch": 0.36, + "learning_rate": 1.9738741569258782e-05, + "loss": 1.7487, + "step": 331 + }, + { + "epoch": 0.36, + "learning_rate": 1.9736025108097834e-05, + "loss": 1.9034, + "step": 332 + }, + { + "epoch": 0.36, + "learning_rate": 1.9733294786412513e-05, + "loss": 1.7912, + "step": 333 + }, + { + "epoch": 0.36, + "learning_rate": 1.9730550608089795e-05, + "loss": 1.8738, + "step": 334 + }, + { + "epoch": 0.36, + "learning_rate": 1.9727792577036383e-05, + "loss": 1.8648, + "step": 335 + }, + { + "epoch": 0.36, + "learning_rate": 1.9725020697178697e-05, + "loss": 1.9759, + "step": 336 + }, + { + "epoch": 0.36, + "learning_rate": 1.972223497246288e-05, + "loss": 1.9795, + "step": 337 + }, + { + "epoch": 0.37, + "learning_rate": 1.9719435406854775e-05, + "loss": 1.9294, + "step": 338 + }, + { + "epoch": 0.37, + "learning_rate": 1.9716622004339943e-05, + "loss": 2.0429, + "step": 339 + }, + { + "epoch": 0.37, + "learning_rate": 1.971379476892363e-05, + "loss": 2.1394, + "step": 340 + }, + { + "epoch": 0.37, + "learning_rate": 1.9710953704630784e-05, + "loss": 1.9411, + "step": 341 + }, + { + "epoch": 0.37, + "learning_rate": 1.9708098815506035e-05, + "loss": 1.8354, + "step": 342 + }, + { + "epoch": 0.37, + "learning_rate": 1.97052301056137e-05, + "loss": 1.9368, + "step": 343 + }, + { + "epoch": 0.37, + "learning_rate": 1.9702347579037765e-05, + "loss": 1.6728, + "step": 344 + }, + { + "epoch": 0.37, + "learning_rate": 1.969945123988189e-05, + "loss": 1.789, + "step": 345 + }, + { + "epoch": 0.37, + "learning_rate": 1.9696541092269402e-05, + "loss": 1.7972, + "step": 346 + }, + { + "epoch": 0.38, + "learning_rate": 1.9693617140343277e-05, + "loss": 2.2036, + "step": 347 + }, + { + "epoch": 0.38, + "learning_rate": 1.969067938826615e-05, + "loss": 1.796, + "step": 348 + }, + { + "epoch": 0.38, + "learning_rate": 1.9687727840220304e-05, + "loss": 1.8355, + "step": 349 + }, + { + "epoch": 0.38, + "learning_rate": 1.9684762500407662e-05, + "loss": 1.8797, + "step": 350 + }, + { + "epoch": 0.38, + "learning_rate": 1.9681783373049768e-05, + "loss": 1.8698, + "step": 351 + }, + { + "epoch": 0.38, + "learning_rate": 1.9678790462387816e-05, + "loss": 1.9311, + "step": 352 + }, + { + "epoch": 0.38, + "learning_rate": 1.967578377268261e-05, + "loss": 1.6739, + "step": 353 + }, + { + "epoch": 0.38, + "learning_rate": 1.9672763308214566e-05, + "loss": 1.9781, + "step": 354 + }, + { + "epoch": 0.38, + "learning_rate": 1.9669729073283724e-05, + "loss": 1.9173, + "step": 355 + }, + { + "epoch": 0.39, + "learning_rate": 1.9666681072209712e-05, + "loss": 1.7903, + "step": 356 + }, + { + "epoch": 0.39, + "learning_rate": 1.966361930933177e-05, + "loss": 1.6572, + "step": 357 + }, + { + "epoch": 0.39, + "learning_rate": 1.9660543789008724e-05, + "loss": 1.893, + "step": 358 + }, + { + "epoch": 0.39, + "learning_rate": 1.9657454515618984e-05, + "loss": 1.6262, + "step": 359 + }, + { + "epoch": 0.39, + "learning_rate": 1.965435149356054e-05, + "loss": 1.8287, + "step": 360 + }, + { + "epoch": 0.39, + "learning_rate": 1.9651234727250958e-05, + "loss": 1.924, + "step": 361 + }, + { + "epoch": 0.39, + "learning_rate": 1.9648104221127363e-05, + "loss": 1.9815, + "step": 362 + }, + { + "epoch": 0.39, + "learning_rate": 1.9644959979646455e-05, + "loss": 1.9731, + "step": 363 + }, + { + "epoch": 0.39, + "learning_rate": 1.964180200728447e-05, + "loss": 1.613, + "step": 364 + }, + { + "epoch": 0.4, + "learning_rate": 1.9638630308537213e-05, + "loss": 1.6979, + "step": 365 + }, + { + "epoch": 0.4, + "learning_rate": 1.9635444887920006e-05, + "loss": 1.7188, + "step": 366 + }, + { + "epoch": 0.4, + "learning_rate": 1.9632245749967723e-05, + "loss": 1.9756, + "step": 367 + }, + { + "epoch": 0.4, + "learning_rate": 1.9629032899234763e-05, + "loss": 2.0186, + "step": 368 + }, + { + "epoch": 0.4, + "learning_rate": 1.9625806340295047e-05, + "loss": 1.952, + "step": 369 + }, + { + "epoch": 0.4, + "learning_rate": 1.9622566077742e-05, + "loss": 1.6013, + "step": 370 + }, + { + "epoch": 0.4, + "learning_rate": 1.961931211618858e-05, + "loss": 1.8473, + "step": 371 + }, + { + "epoch": 0.4, + "learning_rate": 1.9616044460267224e-05, + "loss": 1.9991, + "step": 372 + }, + { + "epoch": 0.4, + "learning_rate": 1.9612763114629875e-05, + "loss": 1.6916, + "step": 373 + }, + { + "epoch": 0.4, + "learning_rate": 1.960946808394797e-05, + "loss": 2.0116, + "step": 374 + }, + { + "epoch": 0.41, + "learning_rate": 1.9606159372912415e-05, + "loss": 2.0294, + "step": 375 + }, + { + "epoch": 0.41, + "learning_rate": 1.9602836986233603e-05, + "loss": 1.8217, + "step": 376 + }, + { + "epoch": 0.41, + "learning_rate": 1.959950092864139e-05, + "loss": 1.8566, + "step": 377 + }, + { + "epoch": 0.41, + "learning_rate": 1.9596151204885103e-05, + "loss": 1.8843, + "step": 378 + }, + { + "epoch": 0.41, + "learning_rate": 1.9592787819733513e-05, + "loss": 1.9598, + "step": 379 + }, + { + "epoch": 0.41, + "learning_rate": 1.9589410777974852e-05, + "loss": 1.9737, + "step": 380 + }, + { + "epoch": 0.41, + "learning_rate": 1.9586020084416777e-05, + "loss": 1.9526, + "step": 381 + }, + { + "epoch": 0.41, + "learning_rate": 1.9582615743886397e-05, + "loss": 1.9623, + "step": 382 + }, + { + "epoch": 0.41, + "learning_rate": 1.9579197761230243e-05, + "loss": 1.9201, + "step": 383 + }, + { + "epoch": 0.42, + "learning_rate": 1.9575766141314264e-05, + "loss": 1.8925, + "step": 384 + }, + { + "epoch": 0.42, + "learning_rate": 1.957232088902383e-05, + "loss": 2.0269, + "step": 385 + }, + { + "epoch": 0.42, + "learning_rate": 1.9568862009263712e-05, + "loss": 1.9761, + "step": 386 + }, + { + "epoch": 0.42, + "learning_rate": 1.9565389506958086e-05, + "loss": 1.7009, + "step": 387 + }, + { + "epoch": 0.42, + "learning_rate": 1.956190338705052e-05, + "loss": 1.8927, + "step": 388 + }, + { + "epoch": 0.42, + "learning_rate": 1.955840365450397e-05, + "loss": 1.7908, + "step": 389 + }, + { + "epoch": 0.42, + "learning_rate": 1.9554890314300768e-05, + "loss": 1.6918, + "step": 390 + }, + { + "epoch": 0.42, + "learning_rate": 1.955136337144262e-05, + "loss": 2.0751, + "step": 391 + }, + { + "epoch": 0.42, + "learning_rate": 1.9547822830950597e-05, + "loss": 1.9847, + "step": 392 + }, + { + "epoch": 0.43, + "learning_rate": 1.954426869786513e-05, + "loss": 1.8185, + "step": 393 + }, + { + "epoch": 0.43, + "learning_rate": 1.9540700977246e-05, + "loss": 1.8964, + "step": 394 + }, + { + "epoch": 0.43, + "learning_rate": 1.9537119674172332e-05, + "loss": 1.7804, + "step": 395 + }, + { + "epoch": 0.43, + "learning_rate": 1.953352479374258e-05, + "loss": 2.0158, + "step": 396 + }, + { + "epoch": 0.43, + "learning_rate": 1.9529916341074538e-05, + "loss": 1.7388, + "step": 397 + }, + { + "epoch": 0.43, + "learning_rate": 1.9526294321305323e-05, + "loss": 1.8575, + "step": 398 + }, + { + "epoch": 0.43, + "learning_rate": 1.9522658739591348e-05, + "loss": 1.8096, + "step": 399 + }, + { + "epoch": 0.43, + "learning_rate": 1.9519009601108358e-05, + "loss": 1.7633, + "step": 400 + }, + { + "epoch": 0.43, + "learning_rate": 1.951534691105138e-05, + "loss": 1.6767, + "step": 401 + }, + { + "epoch": 0.44, + "learning_rate": 1.9511670674634745e-05, + "loss": 1.7279, + "step": 402 + }, + { + "epoch": 0.44, + "learning_rate": 1.9507980897092055e-05, + "loss": 1.7844, + "step": 403 + }, + { + "epoch": 0.44, + "learning_rate": 1.9504277583676204e-05, + "loss": 1.8847, + "step": 404 + }, + { + "epoch": 0.44, + "learning_rate": 1.950056073965935e-05, + "loss": 1.7377, + "step": 405 + }, + { + "epoch": 0.44, + "learning_rate": 1.9496830370332916e-05, + "loss": 1.7804, + "step": 406 + }, + { + "epoch": 0.44, + "learning_rate": 1.949308648100757e-05, + "loss": 1.6626, + "step": 407 + }, + { + "epoch": 0.44, + "learning_rate": 1.9489329077013244e-05, + "loss": 1.8978, + "step": 408 + }, + { + "epoch": 0.44, + "learning_rate": 1.94855581636991e-05, + "loss": 2.005, + "step": 409 + }, + { + "epoch": 0.44, + "learning_rate": 1.948177374643353e-05, + "loss": 1.8455, + "step": 410 + }, + { + "epoch": 0.44, + "learning_rate": 1.9477975830604158e-05, + "loss": 1.7812, + "step": 411 + }, + { + "epoch": 0.45, + "learning_rate": 1.947416442161782e-05, + "loss": 1.7934, + "step": 412 + }, + { + "epoch": 0.45, + "learning_rate": 1.947033952490056e-05, + "loss": 2.0325, + "step": 413 + }, + { + "epoch": 0.45, + "learning_rate": 1.946650114589763e-05, + "loss": 1.7311, + "step": 414 + }, + { + "epoch": 0.45, + "learning_rate": 1.9462649290073473e-05, + "loss": 1.7788, + "step": 415 + }, + { + "epoch": 0.45, + "learning_rate": 1.9458783962911717e-05, + "loss": 2.0093, + "step": 416 + }, + { + "epoch": 0.45, + "learning_rate": 1.9454905169915164e-05, + "loss": 1.8025, + "step": 417 + }, + { + "epoch": 0.45, + "learning_rate": 1.945101291660579e-05, + "loss": 1.7943, + "step": 418 + }, + { + "epoch": 0.45, + "learning_rate": 1.9447107208524744e-05, + "loss": 2.1016, + "step": 419 + }, + { + "epoch": 0.45, + "learning_rate": 1.944318805123231e-05, + "loss": 1.917, + "step": 420 + }, + { + "epoch": 0.46, + "learning_rate": 1.943925545030794e-05, + "loss": 1.8536, + "step": 421 + }, + { + "epoch": 0.46, + "learning_rate": 1.9435309411350195e-05, + "loss": 1.9451, + "step": 422 + }, + { + "epoch": 0.46, + "learning_rate": 1.9431349939976804e-05, + "loss": 1.8264, + "step": 423 + }, + { + "epoch": 0.46, + "learning_rate": 1.942737704182459e-05, + "loss": 1.895, + "step": 424 + }, + { + "epoch": 0.46, + "learning_rate": 1.9423390722549506e-05, + "loss": 2.0386, + "step": 425 + }, + { + "epoch": 0.46, + "learning_rate": 1.94193909878266e-05, + "loss": 1.8459, + "step": 426 + }, + { + "epoch": 0.46, + "learning_rate": 1.9415377843350032e-05, + "loss": 1.6088, + "step": 427 + }, + { + "epoch": 0.46, + "learning_rate": 1.9411351294833047e-05, + "loss": 1.9419, + "step": 428 + }, + { + "epoch": 0.46, + "learning_rate": 1.9407311348007965e-05, + "loss": 1.5992, + "step": 429 + }, + { + "epoch": 0.47, + "learning_rate": 1.940325800862619e-05, + "loss": 1.8278, + "step": 430 + }, + { + "epoch": 0.47, + "learning_rate": 1.939919128245819e-05, + "loss": 1.8029, + "step": 431 + }, + { + "epoch": 0.47, + "learning_rate": 1.9395111175293485e-05, + "loss": 1.861, + "step": 432 + }, + { + "epoch": 0.47, + "learning_rate": 1.939101769294066e-05, + "loss": 1.8148, + "step": 433 + }, + { + "epoch": 0.47, + "learning_rate": 1.9386910841227315e-05, + "loss": 1.7924, + "step": 434 + }, + { + "epoch": 0.47, + "learning_rate": 1.9382790626000112e-05, + "loss": 1.829, + "step": 435 + }, + { + "epoch": 0.47, + "learning_rate": 1.9378657053124718e-05, + "loss": 1.685, + "step": 436 + }, + { + "epoch": 0.47, + "learning_rate": 1.9374510128485824e-05, + "loss": 2.1904, + "step": 437 + }, + { + "epoch": 0.47, + "learning_rate": 1.9370349857987127e-05, + "loss": 1.8356, + "step": 438 + }, + { + "epoch": 0.48, + "learning_rate": 1.9366176247551327e-05, + "loss": 1.9149, + "step": 439 + }, + { + "epoch": 0.48, + "learning_rate": 1.936198930312011e-05, + "loss": 1.6392, + "step": 440 + }, + { + "epoch": 0.48, + "learning_rate": 1.9357789030654146e-05, + "loss": 1.7937, + "step": 441 + }, + { + "epoch": 0.48, + "learning_rate": 1.9353575436133086e-05, + "loss": 1.8761, + "step": 442 + }, + { + "epoch": 0.48, + "learning_rate": 1.9349348525555537e-05, + "loss": 1.9128, + "step": 443 + }, + { + "epoch": 0.48, + "learning_rate": 1.9345108304939065e-05, + "loss": 1.9687, + "step": 444 + }, + { + "epoch": 0.48, + "learning_rate": 1.9340854780320196e-05, + "loss": 1.8992, + "step": 445 + }, + { + "epoch": 0.48, + "learning_rate": 1.9336587957754382e-05, + "loss": 1.8379, + "step": 446 + }, + { + "epoch": 0.48, + "learning_rate": 1.9332307843316004e-05, + "loss": 1.7322, + "step": 447 + }, + { + "epoch": 0.48, + "learning_rate": 1.9328014443098385e-05, + "loss": 1.7952, + "step": 448 + }, + { + "epoch": 0.49, + "learning_rate": 1.932370776321374e-05, + "loss": 1.7826, + "step": 449 + }, + { + "epoch": 0.49, + "learning_rate": 1.931938780979321e-05, + "loss": 1.9815, + "step": 450 + }, + { + "epoch": 0.49, + "learning_rate": 1.9315054588986815e-05, + "loss": 1.5621, + "step": 451 + }, + { + "epoch": 0.49, + "learning_rate": 1.9310708106963473e-05, + "loss": 1.7337, + "step": 452 + }, + { + "epoch": 0.49, + "learning_rate": 1.930634836991098e-05, + "loss": 1.8992, + "step": 453 + }, + { + "epoch": 0.49, + "learning_rate": 1.9301975384035994e-05, + "loss": 1.7118, + "step": 454 + }, + { + "epoch": 0.49, + "learning_rate": 1.929758915556404e-05, + "loss": 1.6617, + "step": 455 + }, + { + "epoch": 0.49, + "learning_rate": 1.9293189690739512e-05, + "loss": 1.9588, + "step": 456 + }, + { + "epoch": 0.49, + "learning_rate": 1.9288776995825616e-05, + "loss": 1.5833, + "step": 457 + }, + { + "epoch": 0.5, + "learning_rate": 1.9284351077104414e-05, + "loss": 1.7649, + "step": 458 + }, + { + "epoch": 0.5, + "learning_rate": 1.9279911940876793e-05, + "loss": 1.714, + "step": 459 + }, + { + "epoch": 0.5, + "learning_rate": 1.927545959346245e-05, + "loss": 1.9171, + "step": 460 + }, + { + "epoch": 0.5, + "learning_rate": 1.927099404119989e-05, + "loss": 1.6816, + "step": 461 + }, + { + "epoch": 0.5, + "learning_rate": 1.9266515290446422e-05, + "loss": 1.6948, + "step": 462 + }, + { + "epoch": 0.5, + "learning_rate": 1.926202334757814e-05, + "loss": 1.9208, + "step": 463 + }, + { + "epoch": 0.5, + "learning_rate": 1.9257518218989925e-05, + "loss": 2.0632, + "step": 464 + }, + { + "epoch": 0.5, + "learning_rate": 1.9252999911095425e-05, + "loss": 1.9065, + "step": 465 + }, + { + "epoch": 0.5, + "learning_rate": 1.924846843032705e-05, + "loss": 1.8936, + "step": 466 + }, + { + "epoch": 0.51, + "learning_rate": 1.9243923783135963e-05, + "loss": 1.9589, + "step": 467 + }, + { + "epoch": 0.51, + "learning_rate": 1.9239365975992075e-05, + "loss": 2.0159, + "step": 468 + }, + { + "epoch": 0.51, + "learning_rate": 1.923479501538403e-05, + "loss": 1.8736, + "step": 469 + }, + { + "epoch": 0.51, + "learning_rate": 1.9230210907819194e-05, + "loss": 1.8036, + "step": 470 + }, + { + "epoch": 0.51, + "learning_rate": 1.9225613659823653e-05, + "loss": 1.8388, + "step": 471 + }, + { + "epoch": 0.51, + "learning_rate": 1.9221003277942206e-05, + "loss": 2.069, + "step": 472 + }, + { + "epoch": 0.51, + "learning_rate": 1.9216379768738338e-05, + "loss": 1.9428, + "step": 473 + }, + { + "epoch": 0.51, + "learning_rate": 1.921174313879423e-05, + "loss": 1.7716, + "step": 474 + }, + { + "epoch": 0.51, + "learning_rate": 1.9207093394710742e-05, + "loss": 1.8555, + "step": 475 + }, + { + "epoch": 0.52, + "learning_rate": 1.920243054310739e-05, + "loss": 2.1768, + "step": 476 + }, + { + "epoch": 0.52, + "learning_rate": 1.9197754590622385e-05, + "loss": 1.8459, + "step": 477 + }, + { + "epoch": 0.52, + "learning_rate": 1.9193065543912546e-05, + "loss": 1.8115, + "step": 478 + }, + { + "epoch": 0.52, + "learning_rate": 1.9188363409653363e-05, + "loss": 1.8124, + "step": 479 + }, + { + "epoch": 0.52, + "learning_rate": 1.9183648194538946e-05, + "loss": 1.7224, + "step": 480 + }, + { + "epoch": 0.52, + "learning_rate": 1.9178919905282033e-05, + "loss": 1.8167, + "step": 481 + }, + { + "epoch": 0.52, + "learning_rate": 1.9174178548613967e-05, + "loss": 1.8557, + "step": 482 + }, + { + "epoch": 0.52, + "learning_rate": 1.9169424131284698e-05, + "loss": 1.7496, + "step": 483 + }, + { + "epoch": 0.52, + "learning_rate": 1.9164656660062773e-05, + "loss": 1.8104, + "step": 484 + }, + { + "epoch": 0.52, + "learning_rate": 1.9159876141735324e-05, + "loss": 1.7362, + "step": 485 + }, + { + "epoch": 0.53, + "learning_rate": 1.9155082583108048e-05, + "loss": 2.0344, + "step": 486 + }, + { + "epoch": 0.53, + "learning_rate": 1.915027599100521e-05, + "loss": 1.7144, + "step": 487 + }, + { + "epoch": 0.53, + "learning_rate": 1.9145456372269643e-05, + "loss": 1.8579, + "step": 488 + }, + { + "epoch": 0.53, + "learning_rate": 1.91406237337627e-05, + "loss": 1.7571, + "step": 489 + }, + { + "epoch": 0.53, + "learning_rate": 1.9135778082364295e-05, + "loss": 1.5567, + "step": 490 + }, + { + "epoch": 0.53, + "learning_rate": 1.913091942497285e-05, + "loss": 1.7423, + "step": 491 + }, + { + "epoch": 0.53, + "learning_rate": 1.9126047768505307e-05, + "loss": 1.9152, + "step": 492 + }, + { + "epoch": 0.53, + "learning_rate": 1.9121163119897122e-05, + "loss": 1.7091, + "step": 493 + }, + { + "epoch": 0.53, + "learning_rate": 1.9116265486102237e-05, + "loss": 1.7242, + "step": 494 + }, + { + "epoch": 0.54, + "learning_rate": 1.9111354874093087e-05, + "loss": 1.9924, + "step": 495 + }, + { + "epoch": 0.54, + "learning_rate": 1.910643129086058e-05, + "loss": 1.8313, + "step": 496 + }, + { + "epoch": 0.54, + "learning_rate": 1.9101494743414082e-05, + "loss": 1.8238, + "step": 497 + }, + { + "epoch": 0.54, + "learning_rate": 1.9096545238781433e-05, + "loss": 1.987, + "step": 498 + }, + { + "epoch": 0.54, + "learning_rate": 1.9091582784008908e-05, + "loss": 1.8133, + "step": 499 + }, + { + "epoch": 0.54, + "learning_rate": 1.908660738616122e-05, + "loss": 1.8589, + "step": 500 + }, + { + "epoch": 0.54, + "learning_rate": 1.908161905232151e-05, + "loss": 1.8214, + "step": 501 + }, + { + "epoch": 0.54, + "learning_rate": 1.9076617789591326e-05, + "loss": 1.7285, + "step": 502 + }, + { + "epoch": 0.54, + "learning_rate": 1.9071603605090638e-05, + "loss": 1.8021, + "step": 503 + }, + { + "epoch": 0.55, + "learning_rate": 1.9066576505957796e-05, + "loss": 1.7359, + "step": 504 + }, + { + "epoch": 0.55, + "learning_rate": 1.9061536499349553e-05, + "loss": 2.1337, + "step": 505 + }, + { + "epoch": 0.55, + "learning_rate": 1.905648359244102e-05, + "loss": 1.787, + "step": 506 + }, + { + "epoch": 0.55, + "learning_rate": 1.905141779242568e-05, + "loss": 1.7364, + "step": 507 + }, + { + "epoch": 0.55, + "learning_rate": 1.904633910651538e-05, + "loss": 1.6271, + "step": 508 + }, + { + "epoch": 0.55, + "learning_rate": 1.9041247541940294e-05, + "loss": 1.9234, + "step": 509 + }, + { + "epoch": 0.55, + "learning_rate": 1.9036143105948944e-05, + "loss": 2.0125, + "step": 510 + }, + { + "epoch": 0.55, + "learning_rate": 1.9031025805808175e-05, + "loss": 2.0037, + "step": 511 + }, + { + "epoch": 0.55, + "learning_rate": 1.902589564880314e-05, + "loss": 1.4813, + "step": 512 + }, + { + "epoch": 0.56, + "learning_rate": 1.9020752642237302e-05, + "loss": 1.8722, + "step": 513 + }, + { + "epoch": 0.56, + "learning_rate": 1.9015596793432408e-05, + "loss": 1.9763, + "step": 514 + }, + { + "epoch": 0.56, + "learning_rate": 1.9010428109728497e-05, + "loss": 1.7076, + "step": 515 + }, + { + "epoch": 0.56, + "learning_rate": 1.9005246598483876e-05, + "loss": 1.7394, + "step": 516 + }, + { + "epoch": 0.56, + "learning_rate": 1.9000052267075116e-05, + "loss": 1.6068, + "step": 517 + }, + { + "epoch": 0.56, + "learning_rate": 1.8994845122897033e-05, + "loss": 1.8676, + "step": 518 + }, + { + "epoch": 0.56, + "learning_rate": 1.898962517336269e-05, + "loss": 1.6328, + "step": 519 + }, + { + "epoch": 0.56, + "learning_rate": 1.8984392425903384e-05, + "loss": 2.1272, + "step": 520 + }, + { + "epoch": 0.56, + "learning_rate": 1.8979146887968615e-05, + "loss": 1.7898, + "step": 521 + }, + { + "epoch": 0.56, + "learning_rate": 1.897388856702611e-05, + "loss": 1.8246, + "step": 522 + }, + { + "epoch": 0.57, + "learning_rate": 1.8968617470561788e-05, + "loss": 1.6391, + "step": 523 + }, + { + "epoch": 0.57, + "learning_rate": 1.896333360607975e-05, + "loss": 1.6649, + "step": 524 + }, + { + "epoch": 0.57, + "learning_rate": 1.895803698110228e-05, + "loss": 1.7837, + "step": 525 + }, + { + "epoch": 0.57, + "learning_rate": 1.895272760316983e-05, + "loss": 1.7271, + "step": 526 + }, + { + "epoch": 0.57, + "learning_rate": 1.8947405479840998e-05, + "loss": 1.6772, + "step": 527 + }, + { + "epoch": 0.57, + "learning_rate": 1.894207061869254e-05, + "loss": 2.2271, + "step": 528 + }, + { + "epoch": 0.57, + "learning_rate": 1.8936723027319334e-05, + "loss": 1.6941, + "step": 529 + }, + { + "epoch": 0.57, + "learning_rate": 1.8931362713334393e-05, + "loss": 1.844, + "step": 530 + }, + { + "epoch": 0.57, + "learning_rate": 1.8925989684368823e-05, + "loss": 1.5838, + "step": 531 + }, + { + "epoch": 0.58, + "learning_rate": 1.892060394807186e-05, + "loss": 2.196, + "step": 532 + }, + { + "epoch": 0.58, + "learning_rate": 1.8915205512110802e-05, + "loss": 1.8994, + "step": 533 + }, + { + "epoch": 0.58, + "learning_rate": 1.8909794384171048e-05, + "loss": 2.097, + "step": 534 + }, + { + "epoch": 0.58, + "learning_rate": 1.8904370571956053e-05, + "loss": 1.8567, + "step": 535 + }, + { + "epoch": 0.58, + "learning_rate": 1.889893408318733e-05, + "loss": 1.7573, + "step": 536 + }, + { + "epoch": 0.58, + "learning_rate": 1.889348492560445e-05, + "loss": 1.8545, + "step": 537 + }, + { + "epoch": 0.58, + "learning_rate": 1.8888023106965007e-05, + "loss": 1.8982, + "step": 538 + }, + { + "epoch": 0.58, + "learning_rate": 1.8882548635044625e-05, + "loss": 1.8322, + "step": 539 + }, + { + "epoch": 0.58, + "learning_rate": 1.8877061517636948e-05, + "loss": 1.7116, + "step": 540 + }, + { + "epoch": 0.59, + "learning_rate": 1.8871561762553612e-05, + "loss": 1.8873, + "step": 541 + }, + { + "epoch": 0.59, + "learning_rate": 1.8866049377624245e-05, + "loss": 1.785, + "step": 542 + }, + { + "epoch": 0.59, + "learning_rate": 1.8860524370696464e-05, + "loss": 1.9869, + "step": 543 + }, + { + "epoch": 0.59, + "learning_rate": 1.8854986749635847e-05, + "loss": 2.0405, + "step": 544 + }, + { + "epoch": 0.59, + "learning_rate": 1.884943652232593e-05, + "loss": 1.7545, + "step": 545 + }, + { + "epoch": 0.59, + "learning_rate": 1.8843873696668208e-05, + "loss": 1.9384, + "step": 546 + }, + { + "epoch": 0.59, + "learning_rate": 1.8838298280582097e-05, + "loss": 1.7382, + "step": 547 + }, + { + "epoch": 0.59, + "learning_rate": 1.8832710282004936e-05, + "loss": 1.9079, + "step": 548 + }, + { + "epoch": 0.59, + "learning_rate": 1.882710970889199e-05, + "loss": 1.7966, + "step": 549 + }, + { + "epoch": 0.6, + "learning_rate": 1.882149656921642e-05, + "loss": 1.7548, + "step": 550 + }, + { + "epoch": 0.6, + "learning_rate": 1.8815870870969267e-05, + "loss": 2.0582, + "step": 551 + }, + { + "epoch": 0.6, + "learning_rate": 1.8810232622159465e-05, + "loss": 1.9251, + "step": 552 + }, + { + "epoch": 0.6, + "learning_rate": 1.8804581830813812e-05, + "loss": 1.7577, + "step": 553 + }, + { + "epoch": 0.6, + "learning_rate": 1.8798918504976953e-05, + "loss": 1.8336, + "step": 554 + }, + { + "epoch": 0.6, + "learning_rate": 1.8793242652711388e-05, + "loss": 2.0418, + "step": 555 + }, + { + "epoch": 0.6, + "learning_rate": 1.8787554282097452e-05, + "loss": 1.7, + "step": 556 + }, + { + "epoch": 0.6, + "learning_rate": 1.8781853401233285e-05, + "loss": 1.9158, + "step": 557 + }, + { + "epoch": 0.6, + "learning_rate": 1.8776140018234855e-05, + "loss": 1.5324, + "step": 558 + }, + { + "epoch": 0.6, + "learning_rate": 1.8770414141235918e-05, + "loss": 1.934, + "step": 559 + }, + { + "epoch": 0.61, + "learning_rate": 1.8764675778388023e-05, + "loss": 1.9987, + "step": 560 + }, + { + "epoch": 0.61, + "learning_rate": 1.8758924937860487e-05, + "loss": 1.7242, + "step": 561 + }, + { + "epoch": 0.61, + "learning_rate": 1.87531616278404e-05, + "loss": 1.7915, + "step": 562 + }, + { + "epoch": 0.61, + "learning_rate": 1.8747385856532596e-05, + "loss": 2.0531, + "step": 563 + }, + { + "epoch": 0.61, + "learning_rate": 1.874159763215965e-05, + "loss": 2.0786, + "step": 564 + }, + { + "epoch": 0.61, + "learning_rate": 1.8735796962961878e-05, + "loss": 1.8106, + "step": 565 + }, + { + "epoch": 0.61, + "learning_rate": 1.872998385719729e-05, + "loss": 1.8355, + "step": 566 + }, + { + "epoch": 0.61, + "learning_rate": 1.872415832314162e-05, + "loss": 1.7494, + "step": 567 + }, + { + "epoch": 0.61, + "learning_rate": 1.871832036908829e-05, + "loss": 1.9507, + "step": 568 + }, + { + "epoch": 0.62, + "learning_rate": 1.87124700033484e-05, + "loss": 1.8078, + "step": 569 + }, + { + "epoch": 0.62, + "learning_rate": 1.8706607234250723e-05, + "loss": 1.8325, + "step": 570 + }, + { + "epoch": 0.62, + "learning_rate": 1.8700732070141693e-05, + "loss": 1.8502, + "step": 571 + }, + { + "epoch": 0.62, + "learning_rate": 1.8694844519385383e-05, + "loss": 1.7116, + "step": 572 + }, + { + "epoch": 0.62, + "learning_rate": 1.86889445903635e-05, + "loss": 1.8984, + "step": 573 + }, + { + "epoch": 0.62, + "learning_rate": 1.8683032291475382e-05, + "loss": 1.8425, + "step": 574 + }, + { + "epoch": 0.62, + "learning_rate": 1.867710763113797e-05, + "loss": 1.7594, + "step": 575 + }, + { + "epoch": 0.62, + "learning_rate": 1.86711706177858e-05, + "loss": 1.6792, + "step": 576 + }, + { + "epoch": 0.62, + "learning_rate": 1.8665221259871005e-05, + "loss": 1.9078, + "step": 577 + }, + { + "epoch": 0.63, + "learning_rate": 1.865925956586328e-05, + "loss": 1.6609, + "step": 578 + }, + { + "epoch": 0.63, + "learning_rate": 1.8653285544249896e-05, + "loss": 1.7867, + "step": 579 + }, + { + "epoch": 0.63, + "learning_rate": 1.864729920353566e-05, + "loss": 1.7511, + "step": 580 + }, + { + "epoch": 0.63, + "learning_rate": 1.864130055224292e-05, + "loss": 1.9401, + "step": 581 + }, + { + "epoch": 0.63, + "learning_rate": 1.863528959891156e-05, + "loss": 2.0443, + "step": 582 + }, + { + "epoch": 0.63, + "learning_rate": 1.8629266352098964e-05, + "loss": 1.8604, + "step": 583 + }, + { + "epoch": 0.63, + "learning_rate": 1.8623230820380026e-05, + "loss": 1.6141, + "step": 584 + }, + { + "epoch": 0.63, + "learning_rate": 1.861718301234713e-05, + "loss": 2.0134, + "step": 585 + }, + { + "epoch": 0.63, + "learning_rate": 1.861112293661013e-05, + "loss": 1.5974, + "step": 586 + }, + { + "epoch": 0.64, + "learning_rate": 1.8605050601796345e-05, + "loss": 1.675, + "step": 587 + }, + { + "epoch": 0.64, + "learning_rate": 1.8598966016550556e-05, + "loss": 1.6918, + "step": 588 + }, + { + "epoch": 0.64, + "learning_rate": 1.8592869189534974e-05, + "loss": 1.7935, + "step": 589 + }, + { + "epoch": 0.64, + "learning_rate": 1.8586760129429247e-05, + "loss": 2.0052, + "step": 590 + }, + { + "epoch": 0.64, + "learning_rate": 1.8580638844930425e-05, + "loss": 1.8375, + "step": 591 + }, + { + "epoch": 0.64, + "learning_rate": 1.8574505344752977e-05, + "loss": 2.0357, + "step": 592 + }, + { + "epoch": 0.64, + "learning_rate": 1.856835963762875e-05, + "loss": 1.9813, + "step": 593 + }, + { + "epoch": 0.64, + "learning_rate": 1.8562201732306976e-05, + "loss": 1.8431, + "step": 594 + }, + { + "epoch": 0.64, + "learning_rate": 1.8556031637554254e-05, + "loss": 1.922, + "step": 595 + }, + { + "epoch": 0.65, + "learning_rate": 1.8549849362154524e-05, + "loss": 1.7421, + "step": 596 + }, + { + "epoch": 0.65, + "learning_rate": 1.8543654914909083e-05, + "loss": 1.8718, + "step": 597 + }, + { + "epoch": 0.65, + "learning_rate": 1.8537448304636546e-05, + "loss": 1.943, + "step": 598 + }, + { + "epoch": 0.65, + "learning_rate": 1.8531229540172852e-05, + "loss": 1.9991, + "step": 599 + }, + { + "epoch": 0.65, + "learning_rate": 1.852499863037123e-05, + "loss": 1.6649, + "step": 600 + }, + { + "epoch": 0.65, + "learning_rate": 1.8518755584102214e-05, + "loss": 1.8183, + "step": 601 + }, + { + "epoch": 0.65, + "learning_rate": 1.8512500410253604e-05, + "loss": 1.7537, + "step": 602 + }, + { + "epoch": 0.65, + "learning_rate": 1.8506233117730478e-05, + "loss": 1.8279, + "step": 603 + }, + { + "epoch": 0.65, + "learning_rate": 1.8499953715455148e-05, + "loss": 1.6765, + "step": 604 + }, + { + "epoch": 0.65, + "learning_rate": 1.8493662212367184e-05, + "loss": 1.9644, + "step": 605 + }, + { + "epoch": 0.66, + "learning_rate": 1.8487358617423376e-05, + "loss": 2.0155, + "step": 606 + }, + { + "epoch": 0.66, + "learning_rate": 1.8481042939597727e-05, + "loss": 1.7584, + "step": 607 + }, + { + "epoch": 0.66, + "learning_rate": 1.8474715187881444e-05, + "loss": 1.6792, + "step": 608 + }, + { + "epoch": 0.66, + "learning_rate": 1.846837537128292e-05, + "loss": 1.8875, + "step": 609 + }, + { + "epoch": 0.66, + "learning_rate": 1.8462023498827728e-05, + "loss": 1.7411, + "step": 610 + }, + { + "epoch": 0.66, + "learning_rate": 1.84556595795586e-05, + "loss": 1.4576, + "step": 611 + }, + { + "epoch": 0.66, + "learning_rate": 1.8449283622535416e-05, + "loss": 1.7889, + "step": 612 + }, + { + "epoch": 0.66, + "learning_rate": 1.8442895636835205e-05, + "loss": 1.7446, + "step": 613 + }, + { + "epoch": 0.66, + "learning_rate": 1.843649563155211e-05, + "loss": 2.0262, + "step": 614 + }, + { + "epoch": 0.67, + "learning_rate": 1.8430083615797386e-05, + "loss": 1.7133, + "step": 615 + }, + { + "epoch": 0.67, + "learning_rate": 1.842365959869939e-05, + "loss": 2.1312, + "step": 616 + }, + { + "epoch": 0.67, + "learning_rate": 1.8417223589403567e-05, + "loss": 2.0981, + "step": 617 + }, + { + "epoch": 0.67, + "learning_rate": 1.8410775597072418e-05, + "loss": 1.6955, + "step": 618 + }, + { + "epoch": 0.67, + "learning_rate": 1.8404315630885535e-05, + "loss": 1.5846, + "step": 619 + }, + { + "epoch": 0.67, + "learning_rate": 1.8397843700039523e-05, + "loss": 1.6552, + "step": 620 + }, + { + "epoch": 0.67, + "learning_rate": 1.839135981374804e-05, + "loss": 1.7879, + "step": 621 + }, + { + "epoch": 0.67, + "learning_rate": 1.838486398124176e-05, + "loss": 1.7106, + "step": 622 + }, + { + "epoch": 0.67, + "learning_rate": 1.8378356211768364e-05, + "loss": 1.8767, + "step": 623 + }, + { + "epoch": 0.68, + "learning_rate": 1.837183651459252e-05, + "loss": 2.015, + "step": 624 + }, + { + "epoch": 0.68, + "learning_rate": 1.8365304898995887e-05, + "loss": 1.5223, + "step": 625 + }, + { + "epoch": 0.68, + "learning_rate": 1.8358761374277088e-05, + "loss": 2.0143, + "step": 626 + }, + { + "epoch": 0.68, + "learning_rate": 1.8352205949751695e-05, + "loss": 1.725, + "step": 627 + }, + { + "epoch": 0.68, + "learning_rate": 1.8345638634752227e-05, + "loss": 1.841, + "step": 628 + }, + { + "epoch": 0.68, + "learning_rate": 1.8339059438628134e-05, + "loss": 1.7485, + "step": 629 + }, + { + "epoch": 0.68, + "learning_rate": 1.8332468370745766e-05, + "loss": 1.5277, + "step": 630 + }, + { + "epoch": 0.68, + "learning_rate": 1.832586544048839e-05, + "loss": 1.6906, + "step": 631 + }, + { + "epoch": 0.68, + "learning_rate": 1.831925065725615e-05, + "loss": 1.7929, + "step": 632 + }, + { + "epoch": 0.69, + "learning_rate": 1.831262403046607e-05, + "loss": 1.6509, + "step": 633 + }, + { + "epoch": 0.69, + "learning_rate": 1.8305985569552034e-05, + "loss": 2.0475, + "step": 634 + }, + { + "epoch": 0.69, + "learning_rate": 1.829933528396477e-05, + "loss": 1.9633, + "step": 635 + }, + { + "epoch": 0.69, + "learning_rate": 1.8292673183171845e-05, + "loss": 1.8058, + "step": 636 + }, + { + "epoch": 0.69, + "learning_rate": 1.8285999276657642e-05, + "loss": 1.7511, + "step": 637 + }, + { + "epoch": 0.69, + "learning_rate": 1.8279313573923354e-05, + "loss": 1.9296, + "step": 638 + }, + { + "epoch": 0.69, + "learning_rate": 1.8272616084486968e-05, + "loss": 1.8956, + "step": 639 + }, + { + "epoch": 0.69, + "learning_rate": 1.8265906817883244e-05, + "loss": 1.8134, + "step": 640 + }, + { + "epoch": 0.69, + "learning_rate": 1.825918578366372e-05, + "loss": 2.0214, + "step": 641 + }, + { + "epoch": 0.69, + "learning_rate": 1.8252452991396676e-05, + "loss": 1.7222, + "step": 642 + }, + { + "epoch": 0.7, + "learning_rate": 1.824570845066714e-05, + "loss": 1.8207, + "step": 643 + }, + { + "epoch": 0.7, + "learning_rate": 1.8238952171076862e-05, + "loss": 1.6518, + "step": 644 + }, + { + "epoch": 0.7, + "learning_rate": 1.8232184162244297e-05, + "loss": 1.5583, + "step": 645 + }, + { + "epoch": 0.7, + "learning_rate": 1.8225404433804605e-05, + "loss": 2.127, + "step": 646 + }, + { + "epoch": 0.7, + "learning_rate": 1.821861299540963e-05, + "loss": 1.561, + "step": 647 + }, + { + "epoch": 0.7, + "learning_rate": 1.8211809856727892e-05, + "loss": 1.9207, + "step": 648 + }, + { + "epoch": 0.7, + "learning_rate": 1.8204995027444554e-05, + "loss": 1.9223, + "step": 649 + }, + { + "epoch": 0.7, + "learning_rate": 1.819816851726144e-05, + "loss": 1.8233, + "step": 650 + }, + { + "epoch": 0.7, + "learning_rate": 1.8191330335896985e-05, + "loss": 1.4765, + "step": 651 + }, + { + "epoch": 0.71, + "learning_rate": 1.8184480493086248e-05, + "loss": 1.6904, + "step": 652 + }, + { + "epoch": 0.71, + "learning_rate": 1.8177618998580897e-05, + "loss": 1.9904, + "step": 653 + }, + { + "epoch": 0.71, + "learning_rate": 1.8170745862149174e-05, + "loss": 1.8617, + "step": 654 + }, + { + "epoch": 0.71, + "learning_rate": 1.8163861093575905e-05, + "loss": 2.1474, + "step": 655 + }, + { + "epoch": 0.71, + "learning_rate": 1.815696470266247e-05, + "loss": 1.967, + "step": 656 + }, + { + "epoch": 0.71, + "learning_rate": 1.8150056699226793e-05, + "loss": 1.7864, + "step": 657 + }, + { + "epoch": 0.71, + "learning_rate": 1.8143137093103345e-05, + "loss": 1.8498, + "step": 658 + }, + { + "epoch": 0.71, + "learning_rate": 1.8136205894143098e-05, + "loss": 1.8173, + "step": 659 + }, + { + "epoch": 0.71, + "learning_rate": 1.8129263112213527e-05, + "loss": 1.9093, + "step": 660 + }, + { + "epoch": 0.72, + "learning_rate": 1.8122308757198614e-05, + "loss": 1.4915, + "step": 661 + }, + { + "epoch": 0.72, + "learning_rate": 1.8115342838998807e-05, + "loss": 1.8303, + "step": 662 + }, + { + "epoch": 0.72, + "learning_rate": 1.8108365367531003e-05, + "loss": 1.7912, + "step": 663 + }, + { + "epoch": 0.72, + "learning_rate": 1.8101376352728572e-05, + "loss": 1.7086, + "step": 664 + }, + { + "epoch": 0.72, + "learning_rate": 1.80943758045413e-05, + "loss": 2.0016, + "step": 665 + }, + { + "epoch": 0.72, + "learning_rate": 1.8087363732935398e-05, + "loss": 1.7456, + "step": 666 + }, + { + "epoch": 0.72, + "learning_rate": 1.8080340147893477e-05, + "loss": 1.7095, + "step": 667 + }, + { + "epoch": 0.72, + "learning_rate": 1.807330505941455e-05, + "loss": 1.8166, + "step": 668 + }, + { + "epoch": 0.72, + "learning_rate": 1.8066258477513992e-05, + "loss": 1.9571, + "step": 669 + }, + { + "epoch": 0.73, + "learning_rate": 1.805920041222355e-05, + "loss": 1.857, + "step": 670 + }, + { + "epoch": 0.73, + "learning_rate": 1.805213087359132e-05, + "loss": 1.8744, + "step": 671 + }, + { + "epoch": 0.73, + "learning_rate": 1.8045049871681726e-05, + "loss": 1.8435, + "step": 672 + }, + { + "epoch": 0.73, + "learning_rate": 1.8037957416575514e-05, + "loss": 2.0229, + "step": 673 + }, + { + "epoch": 0.73, + "learning_rate": 1.803085351836974e-05, + "loss": 1.8561, + "step": 674 + }, + { + "epoch": 0.73, + "learning_rate": 1.802373818717774e-05, + "loss": 1.5955, + "step": 675 + }, + { + "epoch": 0.73, + "learning_rate": 1.8016611433129135e-05, + "loss": 1.6131, + "step": 676 + }, + { + "epoch": 0.73, + "learning_rate": 1.8009473266369806e-05, + "loss": 1.8665, + "step": 677 + }, + { + "epoch": 0.73, + "learning_rate": 1.8002323697061883e-05, + "loss": 1.9577, + "step": 678 + }, + { + "epoch": 0.73, + "learning_rate": 1.7995162735383725e-05, + "loss": 1.6646, + "step": 679 + }, + { + "epoch": 0.74, + "learning_rate": 1.798799039152991e-05, + "loss": 1.7852, + "step": 680 + }, + { + "epoch": 0.74, + "learning_rate": 1.7980806675711225e-05, + "loss": 1.904, + "step": 681 + }, + { + "epoch": 0.74, + "learning_rate": 1.7973611598154644e-05, + "loss": 1.7585, + "step": 682 + }, + { + "epoch": 0.74, + "learning_rate": 1.7966405169103313e-05, + "loss": 1.8896, + "step": 683 + }, + { + "epoch": 0.74, + "learning_rate": 1.795918739881654e-05, + "loss": 1.7932, + "step": 684 + }, + { + "epoch": 0.74, + "learning_rate": 1.7951958297569775e-05, + "loss": 1.887, + "step": 685 + }, + { + "epoch": 0.74, + "learning_rate": 1.7944717875654615e-05, + "loss": 1.7784, + "step": 686 + }, + { + "epoch": 0.74, + "learning_rate": 1.7937466143378754e-05, + "loss": 1.598, + "step": 687 + }, + { + "epoch": 0.74, + "learning_rate": 1.7930203111065997e-05, + "loss": 1.8335, + "step": 688 + }, + { + "epoch": 0.75, + "learning_rate": 1.7922928789056233e-05, + "loss": 1.7218, + "step": 689 + }, + { + "epoch": 0.75, + "learning_rate": 1.7915643187705428e-05, + "loss": 1.8661, + "step": 690 + }, + { + "epoch": 0.75, + "learning_rate": 1.7908346317385602e-05, + "loss": 2.0096, + "step": 691 + }, + { + "epoch": 0.75, + "learning_rate": 1.7901038188484818e-05, + "loss": 1.9253, + "step": 692 + }, + { + "epoch": 0.75, + "learning_rate": 1.789371881140717e-05, + "loss": 1.9858, + "step": 693 + }, + { + "epoch": 0.75, + "learning_rate": 1.7886388196572758e-05, + "loss": 1.6172, + "step": 694 + }, + { + "epoch": 0.75, + "learning_rate": 1.787904635441769e-05, + "loss": 2.1303, + "step": 695 + }, + { + "epoch": 0.75, + "learning_rate": 1.787169329539405e-05, + "loss": 1.9885, + "step": 696 + }, + { + "epoch": 0.75, + "learning_rate": 1.7864329029969903e-05, + "loss": 1.8695, + "step": 697 + }, + { + "epoch": 0.76, + "learning_rate": 1.7856953568629243e-05, + "loss": 1.816, + "step": 698 + }, + { + "epoch": 0.76, + "learning_rate": 1.784956692187203e-05, + "loss": 1.7575, + "step": 699 + }, + { + "epoch": 0.76, + "learning_rate": 1.7842169100214137e-05, + "loss": 1.9882, + "step": 700 + }, + { + "epoch": 0.76, + "learning_rate": 1.7834760114187334e-05, + "loss": 1.8572, + "step": 701 + }, + { + "epoch": 0.76, + "learning_rate": 1.782733997433931e-05, + "loss": 2.1264, + "step": 702 + }, + { + "epoch": 0.76, + "learning_rate": 1.781990869123361e-05, + "loss": 1.7489, + "step": 703 + }, + { + "epoch": 0.76, + "learning_rate": 1.7812466275449656e-05, + "loss": 1.9035, + "step": 704 + }, + { + "epoch": 0.76, + "learning_rate": 1.7805012737582713e-05, + "loss": 2.0032, + "step": 705 + }, + { + "epoch": 0.76, + "learning_rate": 1.779754808824388e-05, + "loss": 1.7685, + "step": 706 + }, + { + "epoch": 0.77, + "learning_rate": 1.7790072338060086e-05, + "loss": 2.076, + "step": 707 + }, + { + "epoch": 0.77, + "learning_rate": 1.7782585497674043e-05, + "loss": 2.145, + "step": 708 + }, + { + "epoch": 0.77, + "learning_rate": 1.7775087577744266e-05, + "loss": 1.8889, + "step": 709 + }, + { + "epoch": 0.77, + "learning_rate": 1.776757858894504e-05, + "loss": 1.7354, + "step": 710 + }, + { + "epoch": 0.77, + "learning_rate": 1.7760058541966406e-05, + "loss": 1.8177, + "step": 711 + }, + { + "epoch": 0.77, + "learning_rate": 1.7752527447514154e-05, + "loss": 1.8914, + "step": 712 + }, + { + "epoch": 0.77, + "learning_rate": 1.7744985316309793e-05, + "loss": 1.5651, + "step": 713 + }, + { + "epoch": 0.77, + "learning_rate": 1.773743215909055e-05, + "loss": 1.8058, + "step": 714 + }, + { + "epoch": 0.77, + "learning_rate": 1.7729867986609338e-05, + "loss": 2.0572, + "step": 715 + }, + { + "epoch": 0.77, + "learning_rate": 1.772229280963478e-05, + "loss": 1.8813, + "step": 716 + }, + { + "epoch": 0.78, + "learning_rate": 1.7714706638951126e-05, + "loss": 1.8782, + "step": 717 + }, + { + "epoch": 0.78, + "learning_rate": 1.770710948535831e-05, + "loss": 1.9337, + "step": 718 + }, + { + "epoch": 0.78, + "learning_rate": 1.769950135967188e-05, + "loss": 1.9506, + "step": 719 + }, + { + "epoch": 0.78, + "learning_rate": 1.7691882272723023e-05, + "loss": 1.8569, + "step": 720 + }, + { + "epoch": 0.78, + "learning_rate": 1.7684252235358514e-05, + "loss": 2.0687, + "step": 721 + }, + { + "epoch": 0.78, + "learning_rate": 1.7676611258440726e-05, + "loss": 2.0012, + "step": 722 + }, + { + "epoch": 0.78, + "learning_rate": 1.76689593528476e-05, + "loss": 1.6913, + "step": 723 + }, + { + "epoch": 0.78, + "learning_rate": 1.766129652947265e-05, + "loss": 1.7295, + "step": 724 + }, + { + "epoch": 0.78, + "learning_rate": 1.7653622799224914e-05, + "loss": 1.4796, + "step": 725 + }, + { + "epoch": 0.79, + "learning_rate": 1.7645938173028964e-05, + "loss": 1.8595, + "step": 726 + }, + { + "epoch": 0.79, + "learning_rate": 1.7638242661824892e-05, + "loss": 1.7979, + "step": 727 + }, + { + "epoch": 0.79, + "learning_rate": 1.7630536276568277e-05, + "loss": 2.0496, + "step": 728 + }, + { + "epoch": 0.79, + "learning_rate": 1.762281902823018e-05, + "loss": 1.7305, + "step": 729 + }, + { + "epoch": 0.79, + "learning_rate": 1.7615090927797135e-05, + "loss": 1.7799, + "step": 730 + }, + { + "epoch": 0.79, + "learning_rate": 1.760735198627111e-05, + "loss": 1.8931, + "step": 731 + }, + { + "epoch": 0.79, + "learning_rate": 1.7599602214669522e-05, + "loss": 1.9162, + "step": 732 + }, + { + "epoch": 0.79, + "learning_rate": 1.75918416240252e-05, + "loss": 1.8491, + "step": 733 + }, + { + "epoch": 0.79, + "learning_rate": 1.7584070225386368e-05, + "loss": 1.9094, + "step": 734 + }, + { + "epoch": 0.8, + "learning_rate": 1.7576288029816654e-05, + "loss": 1.6776, + "step": 735 + }, + { + "epoch": 0.8, + "learning_rate": 1.756849504839504e-05, + "loss": 1.7776, + "step": 736 + }, + { + "epoch": 0.8, + "learning_rate": 1.7560691292215872e-05, + "loss": 1.9614, + "step": 737 + }, + { + "epoch": 0.8, + "learning_rate": 1.7552876772388833e-05, + "loss": 1.9188, + "step": 738 + }, + { + "epoch": 0.8, + "learning_rate": 1.7545051500038926e-05, + "loss": 1.6998, + "step": 739 + }, + { + "epoch": 0.8, + "learning_rate": 1.753721548630647e-05, + "loss": 1.7554, + "step": 740 + }, + { + "epoch": 0.8, + "learning_rate": 1.7529368742347066e-05, + "loss": 1.7072, + "step": 741 + }, + { + "epoch": 0.8, + "learning_rate": 1.75215112793316e-05, + "loss": 1.6595, + "step": 742 + }, + { + "epoch": 0.8, + "learning_rate": 1.7513643108446213e-05, + "loss": 1.6982, + "step": 743 + }, + { + "epoch": 0.81, + "learning_rate": 1.750576424089229e-05, + "loss": 1.5064, + "step": 744 + }, + { + "epoch": 0.81, + "learning_rate": 1.7497874687886447e-05, + "loss": 1.7888, + "step": 745 + }, + { + "epoch": 0.81, + "learning_rate": 1.7489974460660507e-05, + "loss": 1.6298, + "step": 746 + }, + { + "epoch": 0.81, + "learning_rate": 1.7482063570461493e-05, + "loss": 1.9298, + "step": 747 + }, + { + "epoch": 0.81, + "learning_rate": 1.747414202855161e-05, + "loss": 1.8131, + "step": 748 + }, + { + "epoch": 0.81, + "learning_rate": 1.7466209846208225e-05, + "loss": 1.7347, + "step": 749 + }, + { + "epoch": 0.81, + "learning_rate": 1.7458267034723846e-05, + "loss": 1.7985, + "step": 750 + }, + { + "epoch": 0.81, + "learning_rate": 1.745031360540613e-05, + "loss": 1.6756, + "step": 751 + }, + { + "epoch": 0.81, + "learning_rate": 1.744234956957783e-05, + "loss": 1.5744, + "step": 752 + }, + { + "epoch": 0.81, + "learning_rate": 1.743437493857681e-05, + "loss": 2.2723, + "step": 753 + }, + { + "epoch": 0.82, + "learning_rate": 1.7426389723756026e-05, + "loss": 1.8948, + "step": 754 + }, + { + "epoch": 0.82, + "learning_rate": 1.741839393648348e-05, + "loss": 2.0171, + "step": 755 + }, + { + "epoch": 0.82, + "learning_rate": 1.741038758814224e-05, + "loss": 2.0134, + "step": 756 + }, + { + "epoch": 0.82, + "learning_rate": 1.7402370690130406e-05, + "loss": 2.0325, + "step": 757 + }, + { + "epoch": 0.82, + "learning_rate": 1.73943432538611e-05, + "loss": 1.7718, + "step": 758 + }, + { + "epoch": 0.82, + "learning_rate": 1.7386305290762437e-05, + "loss": 1.9268, + "step": 759 + }, + { + "epoch": 0.82, + "learning_rate": 1.737825681227753e-05, + "loss": 2.1387, + "step": 760 + }, + { + "epoch": 0.82, + "learning_rate": 1.7370197829864454e-05, + "loss": 1.7365, + "step": 761 + }, + { + "epoch": 0.82, + "learning_rate": 1.7362128354996242e-05, + "loss": 1.876, + "step": 762 + }, + { + "epoch": 0.83, + "learning_rate": 1.7354048399160866e-05, + "loss": 1.9904, + "step": 763 + }, + { + "epoch": 0.83, + "learning_rate": 1.7345957973861205e-05, + "loss": 2.1336, + "step": 764 + }, + { + "epoch": 0.83, + "learning_rate": 1.7337857090615068e-05, + "loss": 1.5848, + "step": 765 + }, + { + "epoch": 0.83, + "learning_rate": 1.7329745760955122e-05, + "loss": 1.6669, + "step": 766 + }, + { + "epoch": 0.83, + "learning_rate": 1.732162399642894e-05, + "loss": 1.73, + "step": 767 + }, + { + "epoch": 0.83, + "learning_rate": 1.7313491808598914e-05, + "loss": 1.9249, + "step": 768 + }, + { + "epoch": 0.83, + "learning_rate": 1.7305349209042303e-05, + "loss": 1.6414, + "step": 769 + }, + { + "epoch": 0.83, + "learning_rate": 1.729719620935118e-05, + "loss": 1.7314, + "step": 770 + }, + { + "epoch": 0.83, + "learning_rate": 1.728903282113242e-05, + "loss": 1.7717, + "step": 771 + }, + { + "epoch": 0.84, + "learning_rate": 1.7280859056007682e-05, + "loss": 1.8558, + "step": 772 + }, + { + "epoch": 0.84, + "learning_rate": 1.7272674925613424e-05, + "loss": 2.0347, + "step": 773 + }, + { + "epoch": 0.84, + "learning_rate": 1.7264480441600823e-05, + "loss": 1.7946, + "step": 774 + }, + { + "epoch": 0.84, + "learning_rate": 1.7256275615635826e-05, + "loss": 1.5202, + "step": 775 + }, + { + "epoch": 0.84, + "learning_rate": 1.7248060459399092e-05, + "loss": 1.5847, + "step": 776 + }, + { + "epoch": 0.84, + "learning_rate": 1.7239834984585982e-05, + "loss": 1.7596, + "step": 777 + }, + { + "epoch": 0.84, + "learning_rate": 1.7231599202906553e-05, + "loss": 1.5023, + "step": 778 + }, + { + "epoch": 0.84, + "learning_rate": 1.7223353126085536e-05, + "loss": 1.6659, + "step": 779 + }, + { + "epoch": 0.84, + "learning_rate": 1.7215096765862315e-05, + "loss": 1.8749, + "step": 780 + }, + { + "epoch": 0.85, + "learning_rate": 1.720683013399091e-05, + "loss": 1.806, + "step": 781 + }, + { + "epoch": 0.85, + "learning_rate": 1.7198553242239978e-05, + "loss": 1.7427, + "step": 782 + }, + { + "epoch": 0.85, + "learning_rate": 1.7190266102392765e-05, + "loss": 1.7963, + "step": 783 + }, + { + "epoch": 0.85, + "learning_rate": 1.718196872624712e-05, + "loss": 1.8034, + "step": 784 + }, + { + "epoch": 0.85, + "learning_rate": 1.717366112561546e-05, + "loss": 1.6718, + "step": 785 + }, + { + "epoch": 0.85, + "learning_rate": 1.7165343312324755e-05, + "loss": 1.7927, + "step": 786 + }, + { + "epoch": 0.85, + "learning_rate": 1.7157015298216516e-05, + "loss": 1.7335, + "step": 787 + }, + { + "epoch": 0.85, + "learning_rate": 1.714867709514678e-05, + "loss": 1.7174, + "step": 788 + }, + { + "epoch": 0.85, + "learning_rate": 1.714032871498608e-05, + "loss": 1.9039, + "step": 789 + }, + { + "epoch": 0.85, + "learning_rate": 1.713197016961945e-05, + "loss": 1.874, + "step": 790 + }, + { + "epoch": 0.86, + "learning_rate": 1.7123601470946388e-05, + "loss": 1.7786, + "step": 791 + }, + { + "epoch": 0.86, + "learning_rate": 1.7115222630880844e-05, + "loss": 1.8011, + "step": 792 + }, + { + "epoch": 0.86, + "learning_rate": 1.7106833661351213e-05, + "loss": 2.0506, + "step": 793 + }, + { + "epoch": 0.86, + "learning_rate": 1.7098434574300307e-05, + "loss": 2.0258, + "step": 794 + }, + { + "epoch": 0.86, + "learning_rate": 1.7090025381685337e-05, + "loss": 1.628, + "step": 795 + }, + { + "epoch": 0.86, + "learning_rate": 1.708160609547791e-05, + "loss": 1.7226, + "step": 796 + }, + { + "epoch": 0.86, + "learning_rate": 1.7073176727663994e-05, + "loss": 2.1619, + "step": 797 + }, + { + "epoch": 0.86, + "learning_rate": 1.706473729024392e-05, + "loss": 1.6363, + "step": 798 + }, + { + "epoch": 0.86, + "learning_rate": 1.7056287795232338e-05, + "loss": 1.4952, + "step": 799 + }, + { + "epoch": 0.87, + "learning_rate": 1.7047828254658233e-05, + "loss": 1.7071, + "step": 800 + }, + { + "epoch": 0.87, + "learning_rate": 1.703935868056488e-05, + "loss": 1.9266, + "step": 801 + }, + { + "epoch": 0.87, + "learning_rate": 1.703087908500985e-05, + "loss": 1.6527, + "step": 802 + }, + { + "epoch": 0.87, + "learning_rate": 1.702238948006496e-05, + "loss": 1.8177, + "step": 803 + }, + { + "epoch": 0.87, + "learning_rate": 1.70138898778163e-05, + "loss": 1.8119, + "step": 804 + }, + { + "epoch": 0.87, + "learning_rate": 1.7005380290364182e-05, + "loss": 1.887, + "step": 805 + }, + { + "epoch": 0.87, + "learning_rate": 1.6996860729823127e-05, + "loss": 1.5231, + "step": 806 + }, + { + "epoch": 0.87, + "learning_rate": 1.6988331208321868e-05, + "loss": 1.8765, + "step": 807 + }, + { + "epoch": 0.87, + "learning_rate": 1.6979791738003305e-05, + "loss": 2.113, + "step": 808 + }, + { + "epoch": 0.88, + "learning_rate": 1.697124233102451e-05, + "loss": 1.7483, + "step": 809 + }, + { + "epoch": 0.88, + "learning_rate": 1.69626829995567e-05, + "loss": 1.7028, + "step": 810 + }, + { + "epoch": 0.88, + "learning_rate": 1.695411375578522e-05, + "loss": 1.8967, + "step": 811 + }, + { + "epoch": 0.88, + "learning_rate": 1.6945534611909525e-05, + "loss": 1.8347, + "step": 812 + }, + { + "epoch": 0.88, + "learning_rate": 1.6936945580143166e-05, + "loss": 1.8635, + "step": 813 + }, + { + "epoch": 0.88, + "learning_rate": 1.6928346672713768e-05, + "loss": 1.8776, + "step": 814 + }, + { + "epoch": 0.88, + "learning_rate": 1.6919737901863024e-05, + "loss": 1.6324, + "step": 815 + }, + { + "epoch": 0.88, + "learning_rate": 1.6911119279846655e-05, + "loss": 1.6854, + "step": 816 + }, + { + "epoch": 0.88, + "learning_rate": 1.6902490818934417e-05, + "loss": 1.8578, + "step": 817 + }, + { + "epoch": 0.89, + "learning_rate": 1.6893852531410066e-05, + "loss": 1.6869, + "step": 818 + }, + { + "epoch": 0.89, + "learning_rate": 1.6885204429571356e-05, + "loss": 1.7316, + "step": 819 + }, + { + "epoch": 0.89, + "learning_rate": 1.6876546525730005e-05, + "loss": 1.85, + "step": 820 + }, + { + "epoch": 0.89, + "learning_rate": 1.686787883221169e-05, + "loss": 1.6942, + "step": 821 + }, + { + "epoch": 0.89, + "learning_rate": 1.6859201361356025e-05, + "loss": 1.7333, + "step": 822 + }, + { + "epoch": 0.89, + "learning_rate": 1.685051412551654e-05, + "loss": 1.4995, + "step": 823 + }, + { + "epoch": 0.89, + "learning_rate": 1.6841817137060666e-05, + "loss": 1.7027, + "step": 824 + }, + { + "epoch": 0.89, + "learning_rate": 1.683311040836973e-05, + "loss": 2.003, + "step": 825 + }, + { + "epoch": 0.89, + "learning_rate": 1.68243939518389e-05, + "loss": 1.7645, + "step": 826 + }, + { + "epoch": 0.9, + "learning_rate": 1.6815667779877226e-05, + "loss": 1.6719, + "step": 827 + }, + { + "epoch": 0.9, + "learning_rate": 1.6806931904907562e-05, + "loss": 1.8548, + "step": 828 + }, + { + "epoch": 0.9, + "learning_rate": 1.6798186339366593e-05, + "loss": 1.7971, + "step": 829 + }, + { + "epoch": 0.9, + "learning_rate": 1.678943109570479e-05, + "loss": 1.8877, + "step": 830 + }, + { + "epoch": 0.9, + "learning_rate": 1.67806661863864e-05, + "loss": 1.7383, + "step": 831 + }, + { + "epoch": 0.9, + "learning_rate": 1.677189162388944e-05, + "loss": 1.7564, + "step": 832 + }, + { + "epoch": 0.9, + "learning_rate": 1.676310742070566e-05, + "loss": 1.6716, + "step": 833 + }, + { + "epoch": 0.9, + "learning_rate": 1.6754313589340546e-05, + "loss": 1.6561, + "step": 834 + }, + { + "epoch": 0.9, + "learning_rate": 1.674551014231328e-05, + "loss": 2.1709, + "step": 835 + }, + { + "epoch": 0.9, + "learning_rate": 1.673669709215674e-05, + "loss": 1.5265, + "step": 836 + }, + { + "epoch": 0.91, + "learning_rate": 1.6727874451417473e-05, + "loss": 1.8503, + "step": 837 + }, + { + "epoch": 0.91, + "learning_rate": 1.6719042232655677e-05, + "loss": 1.8054, + "step": 838 + }, + { + "epoch": 0.91, + "learning_rate": 1.671020044844519e-05, + "loss": 1.8034, + "step": 839 + }, + { + "epoch": 0.91, + "learning_rate": 1.6701349111373465e-05, + "loss": 1.5963, + "step": 840 + }, + { + "epoch": 0.91, + "learning_rate": 1.6692488234041556e-05, + "loss": 1.9236, + "step": 841 + }, + { + "epoch": 0.91, + "learning_rate": 1.66836178290641e-05, + "loss": 1.5941, + "step": 842 + }, + { + "epoch": 0.91, + "learning_rate": 1.667473790906929e-05, + "loss": 1.8286, + "step": 843 + }, + { + "epoch": 0.91, + "learning_rate": 1.666584848669888e-05, + "loss": 1.7725, + "step": 844 + }, + { + "epoch": 0.91, + "learning_rate": 1.6656949574608138e-05, + "loss": 1.6444, + "step": 845 + }, + { + "epoch": 0.92, + "learning_rate": 1.6648041185465846e-05, + "loss": 1.8188, + "step": 846 + }, + { + "epoch": 0.92, + "learning_rate": 1.6639123331954276e-05, + "loss": 1.7729, + "step": 847 + }, + { + "epoch": 0.92, + "learning_rate": 1.6630196026769187e-05, + "loss": 1.6913, + "step": 848 + }, + { + "epoch": 0.92, + "learning_rate": 1.662125928261977e-05, + "loss": 1.7507, + "step": 849 + }, + { + "epoch": 0.92, + "learning_rate": 1.661231311222868e-05, + "loss": 1.6705, + "step": 850 + }, + { + "epoch": 0.92, + "learning_rate": 1.6603357528331966e-05, + "loss": 1.8839, + "step": 851 + }, + { + "epoch": 0.92, + "learning_rate": 1.6594392543679098e-05, + "loss": 1.7405, + "step": 852 + }, + { + "epoch": 0.92, + "learning_rate": 1.6585418171032925e-05, + "loss": 1.8388, + "step": 853 + }, + { + "epoch": 0.92, + "learning_rate": 1.6576434423169647e-05, + "loss": 1.4925, + "step": 854 + }, + { + "epoch": 0.93, + "learning_rate": 1.6567441312878828e-05, + "loss": 1.899, + "step": 855 + }, + { + "epoch": 0.93, + "learning_rate": 1.6558438852963358e-05, + "loss": 2.1859, + "step": 856 + }, + { + "epoch": 0.93, + "learning_rate": 1.6549427056239432e-05, + "loss": 1.7347, + "step": 857 + }, + { + "epoch": 0.93, + "learning_rate": 1.6540405935536532e-05, + "loss": 1.8964, + "step": 858 + }, + { + "epoch": 0.93, + "learning_rate": 1.6531375503697433e-05, + "loss": 1.9026, + "step": 859 + }, + { + "epoch": 0.93, + "learning_rate": 1.6522335773578143e-05, + "loss": 1.7895, + "step": 860 + }, + { + "epoch": 0.93, + "learning_rate": 1.6513286758047923e-05, + "loss": 2.135, + "step": 861 + }, + { + "epoch": 0.93, + "learning_rate": 1.6504228469989248e-05, + "loss": 1.789, + "step": 862 + }, + { + "epoch": 0.93, + "learning_rate": 1.6495160922297793e-05, + "loss": 2.0962, + "step": 863 + }, + { + "epoch": 0.94, + "learning_rate": 1.6486084127882416e-05, + "loss": 1.8792, + "step": 864 + }, + { + "epoch": 0.94, + "learning_rate": 1.647699809966514e-05, + "loss": 1.6231, + "step": 865 + }, + { + "epoch": 0.94, + "learning_rate": 1.646790285058113e-05, + "loss": 1.5529, + "step": 866 + }, + { + "epoch": 0.94, + "learning_rate": 1.6458798393578684e-05, + "loss": 1.8931, + "step": 867 + }, + { + "epoch": 0.94, + "learning_rate": 1.64496847416192e-05, + "loss": 1.5178, + "step": 868 + }, + { + "epoch": 0.94, + "learning_rate": 1.644056190767718e-05, + "loss": 1.791, + "step": 869 + }, + { + "epoch": 0.94, + "learning_rate": 1.6431429904740183e-05, + "loss": 1.6673, + "step": 870 + }, + { + "epoch": 0.94, + "learning_rate": 1.6422288745808828e-05, + "loss": 1.6321, + "step": 871 + }, + { + "epoch": 0.94, + "learning_rate": 1.641313844389677e-05, + "loss": 1.8554, + "step": 872 + }, + { + "epoch": 0.94, + "learning_rate": 1.6403979012030677e-05, + "loss": 1.6654, + "step": 873 + }, + { + "epoch": 0.95, + "learning_rate": 1.6394810463250218e-05, + "loss": 1.7586, + "step": 874 + }, + { + "epoch": 0.95, + "learning_rate": 1.6385632810608035e-05, + "loss": 1.8075, + "step": 875 + }, + { + "epoch": 0.95, + "learning_rate": 1.6376446067169744e-05, + "loss": 1.7, + "step": 876 + }, + { + "epoch": 0.95, + "learning_rate": 1.6367250246013887e-05, + "loss": 1.8388, + "step": 877 + }, + { + "epoch": 0.95, + "learning_rate": 1.6358045360231936e-05, + "loss": 1.562, + "step": 878 + }, + { + "epoch": 0.95, + "learning_rate": 1.6348831422928277e-05, + "loss": 1.9606, + "step": 879 + }, + { + "epoch": 0.95, + "learning_rate": 1.6339608447220163e-05, + "loss": 1.9655, + "step": 880 + }, + { + "epoch": 0.95, + "learning_rate": 1.6330376446237726e-05, + "loss": 2.0021, + "step": 881 + }, + { + "epoch": 0.95, + "learning_rate": 1.6321135433123946e-05, + "loss": 1.9258, + "step": 882 + }, + { + "epoch": 0.96, + "learning_rate": 1.6311885421034638e-05, + "loss": 1.7109, + "step": 883 + }, + { + "epoch": 0.96, + "learning_rate": 1.6302626423138412e-05, + "loss": 2.0604, + "step": 884 + }, + { + "epoch": 0.96, + "learning_rate": 1.629335845261669e-05, + "loss": 1.7187, + "step": 885 + }, + { + "epoch": 0.96, + "learning_rate": 1.6284081522663652e-05, + "loss": 1.8782, + "step": 886 + }, + { + "epoch": 0.96, + "learning_rate": 1.6274795646486244e-05, + "loss": 1.8045, + "step": 887 + }, + { + "epoch": 0.96, + "learning_rate": 1.626550083730414e-05, + "loss": 2.041, + "step": 888 + }, + { + "epoch": 0.96, + "learning_rate": 1.6256197108349734e-05, + "loss": 1.8206, + "step": 889 + }, + { + "epoch": 0.96, + "learning_rate": 1.6246884472868128e-05, + "loss": 1.7373, + "step": 890 + }, + { + "epoch": 0.96, + "learning_rate": 1.6237562944117087e-05, + "loss": 1.6757, + "step": 891 + }, + { + "epoch": 0.97, + "learning_rate": 1.6228232535367045e-05, + "loss": 1.9031, + "step": 892 + }, + { + "epoch": 0.97, + "learning_rate": 1.621889325990108e-05, + "loss": 1.626, + "step": 893 + }, + { + "epoch": 0.97, + "learning_rate": 1.6209545131014893e-05, + "loss": 1.5336, + "step": 894 + }, + { + "epoch": 0.97, + "learning_rate": 1.6200188162016778e-05, + "loss": 1.8612, + "step": 895 + }, + { + "epoch": 0.97, + "learning_rate": 1.619082236622763e-05, + "loss": 1.8652, + "step": 896 + }, + { + "epoch": 0.97, + "learning_rate": 1.6181447756980903e-05, + "loss": 1.9462, + "step": 897 + }, + { + "epoch": 0.97, + "learning_rate": 1.6172064347622595e-05, + "loss": 1.7591, + "step": 898 + }, + { + "epoch": 0.97, + "learning_rate": 1.616267215151124e-05, + "loss": 2.0353, + "step": 899 + }, + { + "epoch": 0.97, + "learning_rate": 1.6153271182017864e-05, + "loss": 1.7371, + "step": 900 + }, + { + "epoch": 0.98, + "learning_rate": 1.614386145252601e-05, + "loss": 1.5204, + "step": 901 + }, + { + "epoch": 0.98, + "learning_rate": 1.613444297643167e-05, + "loss": 1.6634, + "step": 902 + }, + { + "epoch": 0.98, + "learning_rate": 1.6125015767143293e-05, + "loss": 1.9809, + "step": 903 + }, + { + "epoch": 0.98, + "learning_rate": 1.611557983808177e-05, + "loss": 1.9466, + "step": 904 + }, + { + "epoch": 0.98, + "learning_rate": 1.610613520268039e-05, + "loss": 1.8158, + "step": 905 + }, + { + "epoch": 0.98, + "learning_rate": 1.609668187438485e-05, + "loss": 1.6599, + "step": 906 + }, + { + "epoch": 0.98, + "learning_rate": 1.608721986665322e-05, + "loss": 1.6313, + "step": 907 + }, + { + "epoch": 0.98, + "learning_rate": 1.6077749192955922e-05, + "loss": 2.0018, + "step": 908 + }, + { + "epoch": 0.98, + "learning_rate": 1.6068269866775722e-05, + "loss": 1.5188, + "step": 909 + }, + { + "epoch": 0.98, + "learning_rate": 1.6058781901607687e-05, + "loss": 1.901, + "step": 910 + }, + { + "epoch": 0.99, + "learning_rate": 1.604928531095921e-05, + "loss": 1.7248, + "step": 911 + }, + { + "epoch": 0.99, + "learning_rate": 1.6039780108349938e-05, + "loss": 1.9463, + "step": 912 + }, + { + "epoch": 0.99, + "learning_rate": 1.603026630731179e-05, + "loss": 1.9392, + "step": 913 + }, + { + "epoch": 0.99, + "learning_rate": 1.6020743921388937e-05, + "loss": 1.7979, + "step": 914 + }, + { + "epoch": 0.99, + "learning_rate": 1.6011212964137742e-05, + "loss": 1.9517, + "step": 915 + }, + { + "epoch": 0.99, + "learning_rate": 1.60016734491268e-05, + "loss": 1.5702, + "step": 916 + }, + { + "epoch": 0.99, + "learning_rate": 1.5992125389936873e-05, + "loss": 1.775, + "step": 917 + }, + { + "epoch": 0.99, + "learning_rate": 1.598256880016089e-05, + "loss": 1.8394, + "step": 918 + }, + { + "epoch": 0.99, + "learning_rate": 1.5973003693403928e-05, + "loss": 1.7364, + "step": 919 + }, + { + "epoch": 1.0, + "learning_rate": 1.5963430083283185e-05, + "loss": 2.0016, + "step": 920 + }, + { + "epoch": 1.0, + "learning_rate": 1.5953847983427967e-05, + "loss": 1.7369, + "step": 921 + }, + { + "epoch": 1.0, + "learning_rate": 1.5944257407479665e-05, + "loss": 2.1433, + "step": 922 + }, + { + "epoch": 1.0, + "learning_rate": 1.5934658369091734e-05, + "loss": 1.8075, + "step": 923 + }, + { + "epoch": 1.0, + "learning_rate": 1.5925050881929682e-05, + "loss": 1.7377, + "step": 924 + }, + { + "epoch": 1.0, + "learning_rate": 1.5915434959671046e-05, + "loss": 1.6292, + "step": 925 + }, + { + "epoch": 1.0, + "learning_rate": 1.590581061600536e-05, + "loss": 1.5613, + "step": 926 + }, + { + "epoch": 1.0, + "learning_rate": 1.5896177864634166e-05, + "loss": 1.7109, + "step": 927 + }, + { + "epoch": 1.0, + "learning_rate": 1.5886536719270956e-05, + "loss": 1.4451, + "step": 928 + }, + { + "epoch": 1.01, + "learning_rate": 1.5876887193641183e-05, + "loss": 1.7095, + "step": 929 + }, + { + "epoch": 1.01, + "learning_rate": 1.5867229301482227e-05, + "loss": 1.8886, + "step": 930 + }, + { + "epoch": 1.01, + "learning_rate": 1.585756305654338e-05, + "loss": 1.395, + "step": 931 + }, + { + "epoch": 1.01, + "learning_rate": 1.5847888472585826e-05, + "loss": 1.7254, + "step": 932 + }, + { + "epoch": 1.01, + "learning_rate": 1.583820556338262e-05, + "loss": 1.704, + "step": 933 + }, + { + "epoch": 1.01, + "learning_rate": 1.582851434271867e-05, + "loss": 1.7093, + "step": 934 + }, + { + "epoch": 1.01, + "learning_rate": 1.5818814824390713e-05, + "loss": 1.7495, + "step": 935 + }, + { + "epoch": 1.01, + "learning_rate": 1.58091070222073e-05, + "loss": 1.9524, + "step": 936 + }, + { + "epoch": 1.01, + "learning_rate": 1.5799390949988775e-05, + "loss": 1.7421, + "step": 937 + }, + { + "epoch": 1.02, + "learning_rate": 1.578966662156726e-05, + "loss": 1.6707, + "step": 938 + }, + { + "epoch": 1.02, + "learning_rate": 1.5779934050786633e-05, + "loss": 1.5381, + "step": 939 + }, + { + "epoch": 1.02, + "learning_rate": 1.577019325150249e-05, + "loss": 1.4886, + "step": 940 + }, + { + "epoch": 1.02, + "learning_rate": 1.5760444237582157e-05, + "loss": 1.7248, + "step": 941 + }, + { + "epoch": 1.02, + "learning_rate": 1.5750687022904654e-05, + "loss": 1.4004, + "step": 942 + }, + { + "epoch": 1.02, + "learning_rate": 1.5740921621360664e-05, + "loss": 1.7162, + "step": 943 + }, + { + "epoch": 1.02, + "learning_rate": 1.5731148046852537e-05, + "loss": 1.6786, + "step": 944 + }, + { + "epoch": 1.02, + "learning_rate": 1.572136631329425e-05, + "loss": 1.442, + "step": 945 + }, + { + "epoch": 1.02, + "learning_rate": 1.5711576434611404e-05, + "loss": 1.5372, + "step": 946 + }, + { + "epoch": 1.02, + "learning_rate": 1.5701778424741188e-05, + "loss": 1.4451, + "step": 947 + }, + { + "epoch": 1.03, + "learning_rate": 1.5691972297632373e-05, + "loss": 1.5666, + "step": 948 + }, + { + "epoch": 1.03, + "learning_rate": 1.568215806724528e-05, + "loss": 1.7347, + "step": 949 + }, + { + "epoch": 1.03, + "learning_rate": 1.5672335747551772e-05, + "loss": 1.4896, + "step": 950 + }, + { + "epoch": 1.03, + "learning_rate": 1.566250535253522e-05, + "loss": 1.57, + "step": 951 + }, + { + "epoch": 1.03, + "learning_rate": 1.5652666896190498e-05, + "loss": 1.6703, + "step": 952 + }, + { + "epoch": 1.03, + "learning_rate": 1.5642820392523962e-05, + "loss": 1.4305, + "step": 953 + }, + { + "epoch": 1.03, + "learning_rate": 1.563296585555341e-05, + "loss": 1.6613, + "step": 954 + }, + { + "epoch": 1.03, + "learning_rate": 1.5623103299308082e-05, + "loss": 1.9669, + "step": 955 + }, + { + "epoch": 1.03, + "learning_rate": 1.5613232737828644e-05, + "loss": 1.7696, + "step": 956 + }, + { + "epoch": 1.04, + "learning_rate": 1.560335418516714e-05, + "loss": 1.6591, + "step": 957 + }, + { + "epoch": 1.04, + "learning_rate": 1.5593467655387012e-05, + "loss": 1.5471, + "step": 958 + }, + { + "epoch": 1.04, + "learning_rate": 1.5583573162563045e-05, + "loss": 1.3704, + "step": 959 + }, + { + "epoch": 1.04, + "learning_rate": 1.5573670720781363e-05, + "loss": 1.6398, + "step": 960 + }, + { + "epoch": 1.04, + "learning_rate": 1.55637603441394e-05, + "loss": 1.7751, + "step": 961 + }, + { + "epoch": 1.04, + "learning_rate": 1.5553842046745903e-05, + "loss": 1.4721, + "step": 962 + }, + { + "epoch": 1.04, + "learning_rate": 1.554391584272088e-05, + "loss": 1.5168, + "step": 963 + }, + { + "epoch": 1.04, + "learning_rate": 1.5533981746195598e-05, + "loss": 1.4701, + "step": 964 + }, + { + "epoch": 1.04, + "learning_rate": 1.5524039771312565e-05, + "loss": 1.5164, + "step": 965 + }, + { + "epoch": 1.05, + "learning_rate": 1.5514089932225506e-05, + "loss": 1.6643, + "step": 966 + }, + { + "epoch": 1.05, + "learning_rate": 1.5504132243099327e-05, + "loss": 1.5087, + "step": 967 + }, + { + "epoch": 1.05, + "learning_rate": 1.5494166718110137e-05, + "loss": 1.6638, + "step": 968 + }, + { + "epoch": 1.05, + "learning_rate": 1.5484193371445168e-05, + "loss": 1.4278, + "step": 969 + }, + { + "epoch": 1.05, + "learning_rate": 1.5474212217302814e-05, + "loss": 1.5084, + "step": 970 + }, + { + "epoch": 1.05, + "learning_rate": 1.5464223269892564e-05, + "loss": 1.4425, + "step": 971 + }, + { + "epoch": 1.05, + "learning_rate": 1.545422654343502e-05, + "loss": 1.7087, + "step": 972 + }, + { + "epoch": 1.05, + "learning_rate": 1.544422205216184e-05, + "loss": 1.4237, + "step": 973 + }, + { + "epoch": 1.05, + "learning_rate": 1.543420981031576e-05, + "loss": 1.7358, + "step": 974 + }, + { + "epoch": 1.06, + "learning_rate": 1.5424189832150518e-05, + "loss": 1.8258, + "step": 975 + }, + { + "epoch": 1.06, + "learning_rate": 1.5414162131930896e-05, + "loss": 1.7415, + "step": 976 + }, + { + "epoch": 1.06, + "learning_rate": 1.5404126723932648e-05, + "loss": 1.8253, + "step": 977 + }, + { + "epoch": 1.06, + "learning_rate": 1.5394083622442516e-05, + "loss": 1.4271, + "step": 978 + }, + { + "epoch": 1.06, + "learning_rate": 1.5384032841758186e-05, + "loss": 1.6272, + "step": 979 + }, + { + "epoch": 1.06, + "learning_rate": 1.537397439618828e-05, + "loss": 1.6815, + "step": 980 + }, + { + "epoch": 1.06, + "learning_rate": 1.536390830005233e-05, + "loss": 1.6434, + "step": 981 + }, + { + "epoch": 1.06, + "learning_rate": 1.5353834567680758e-05, + "loss": 1.6524, + "step": 982 + }, + { + "epoch": 1.06, + "learning_rate": 1.5343753213414862e-05, + "loss": 1.6558, + "step": 983 + }, + { + "epoch": 1.06, + "learning_rate": 1.5333664251606787e-05, + "loss": 1.5875, + "step": 984 + }, + { + "epoch": 1.07, + "learning_rate": 1.5323567696619513e-05, + "loss": 1.527, + "step": 985 + }, + { + "epoch": 1.07, + "learning_rate": 1.531346356282682e-05, + "loss": 1.5793, + "step": 986 + }, + { + "epoch": 1.07, + "learning_rate": 1.530335186461329e-05, + "loss": 1.6894, + "step": 987 + }, + { + "epoch": 1.07, + "learning_rate": 1.5293232616374267e-05, + "loss": 1.687, + "step": 988 + }, + { + "epoch": 1.07, + "learning_rate": 1.5283105832515842e-05, + "loss": 1.7821, + "step": 989 + }, + { + "epoch": 1.07, + "learning_rate": 1.5272971527454838e-05, + "loss": 1.8022, + "step": 990 + }, + { + "epoch": 1.07, + "learning_rate": 1.5262829715618782e-05, + "loss": 1.586, + "step": 991 + }, + { + "epoch": 1.07, + "learning_rate": 1.5252680411445892e-05, + "loss": 1.6101, + "step": 992 + }, + { + "epoch": 1.07, + "learning_rate": 1.5242523629385048e-05, + "loss": 1.6305, + "step": 993 + }, + { + "epoch": 1.08, + "learning_rate": 1.5232359383895779e-05, + "loss": 1.7602, + "step": 994 + }, + { + "epoch": 1.08, + "learning_rate": 1.5222187689448235e-05, + "loss": 1.3457, + "step": 995 + }, + { + "epoch": 1.08, + "learning_rate": 1.521200856052318e-05, + "loss": 1.6397, + "step": 996 + }, + { + "epoch": 1.08, + "learning_rate": 1.520182201161195e-05, + "loss": 1.5046, + "step": 997 + }, + { + "epoch": 1.08, + "learning_rate": 1.5191628057216452e-05, + "loss": 1.6075, + "step": 998 + }, + { + "epoch": 1.08, + "learning_rate": 1.5181426711849133e-05, + "loss": 1.5093, + "step": 999 + }, + { + "epoch": 1.08, + "learning_rate": 1.517121799003296e-05, + "loss": 1.7927, + "step": 1000 + }, + { + "epoch": 1.08, + "learning_rate": 1.5161001906301407e-05, + "loss": 1.7316, + "step": 1001 + }, + { + "epoch": 1.08, + "learning_rate": 1.5150778475198427e-05, + "loss": 1.7274, + "step": 1002 + }, + { + "epoch": 1.09, + "learning_rate": 1.5140547711278428e-05, + "loss": 1.2897, + "step": 1003 + }, + { + "epoch": 1.09, + "learning_rate": 1.5130309629106264e-05, + "loss": 1.5808, + "step": 1004 + }, + { + "epoch": 1.09, + "learning_rate": 1.51200642432572e-05, + "loss": 1.7323, + "step": 1005 + }, + { + "epoch": 1.09, + "learning_rate": 1.5109811568316906e-05, + "loss": 1.6818, + "step": 1006 + }, + { + "epoch": 1.09, + "learning_rate": 1.5099551618881426e-05, + "loss": 1.4173, + "step": 1007 + }, + { + "epoch": 1.09, + "learning_rate": 1.5089284409557158e-05, + "loss": 1.7259, + "step": 1008 + }, + { + "epoch": 1.09, + "learning_rate": 1.5079009954960842e-05, + "loss": 1.5218, + "step": 1009 + }, + { + "epoch": 1.09, + "learning_rate": 1.5068728269719524e-05, + "loss": 1.6281, + "step": 1010 + }, + { + "epoch": 1.09, + "learning_rate": 1.505843936847055e-05, + "loss": 1.7548, + "step": 1011 + }, + { + "epoch": 1.1, + "learning_rate": 1.5048143265861536e-05, + "loss": 1.524, + "step": 1012 + }, + { + "epoch": 1.1, + "learning_rate": 1.5037839976550352e-05, + "loss": 1.7357, + "step": 1013 + }, + { + "epoch": 1.1, + "learning_rate": 1.5027529515205097e-05, + "loss": 1.7994, + "step": 1014 + }, + { + "epoch": 1.1, + "learning_rate": 1.5017211896504082e-05, + "loss": 1.8074, + "step": 1015 + }, + { + "epoch": 1.1, + "learning_rate": 1.500688713513581e-05, + "loss": 1.5332, + "step": 1016 + }, + { + "epoch": 1.1, + "learning_rate": 1.4996555245798944e-05, + "loss": 1.673, + "step": 1017 + }, + { + "epoch": 1.1, + "learning_rate": 1.4986216243202307e-05, + "loss": 1.6054, + "step": 1018 + }, + { + "epoch": 1.1, + "learning_rate": 1.4975870142064838e-05, + "loss": 1.8379, + "step": 1019 + }, + { + "epoch": 1.1, + "learning_rate": 1.4965516957115585e-05, + "loss": 1.7481, + "step": 1020 + }, + { + "epoch": 1.1, + "learning_rate": 1.495515670309368e-05, + "loss": 1.7256, + "step": 1021 + }, + { + "epoch": 1.11, + "learning_rate": 1.4944789394748322e-05, + "loss": 1.5541, + "step": 1022 + }, + { + "epoch": 1.11, + "learning_rate": 1.4934415046838755e-05, + "loss": 1.7022, + "step": 1023 + }, + { + "epoch": 1.11, + "learning_rate": 1.4924033674134236e-05, + "loss": 1.6277, + "step": 1024 + }, + { + "epoch": 1.11, + "learning_rate": 1.4913645291414027e-05, + "loss": 1.8026, + "step": 1025 + }, + { + "epoch": 1.11, + "learning_rate": 1.490324991346737e-05, + "loss": 1.548, + "step": 1026 + }, + { + "epoch": 1.11, + "learning_rate": 1.4892847555093468e-05, + "loss": 1.7814, + "step": 1027 + }, + { + "epoch": 1.11, + "learning_rate": 1.4882438231101456e-05, + "loss": 1.3886, + "step": 1028 + }, + { + "epoch": 1.11, + "learning_rate": 1.4872021956310394e-05, + "loss": 1.503, + "step": 1029 + }, + { + "epoch": 1.11, + "learning_rate": 1.4861598745549228e-05, + "loss": 1.5944, + "step": 1030 + }, + { + "epoch": 1.12, + "learning_rate": 1.4851168613656785e-05, + "loss": 1.5893, + "step": 1031 + }, + { + "epoch": 1.12, + "learning_rate": 1.484073157548174e-05, + "loss": 1.4621, + "step": 1032 + }, + { + "epoch": 1.12, + "learning_rate": 1.4830287645882602e-05, + "loss": 1.6625, + "step": 1033 + }, + { + "epoch": 1.12, + "learning_rate": 1.481983683972769e-05, + "loss": 1.6391, + "step": 1034 + }, + { + "epoch": 1.12, + "learning_rate": 1.4809379171895122e-05, + "loss": 1.7311, + "step": 1035 + }, + { + "epoch": 1.12, + "learning_rate": 1.4798914657272771e-05, + "loss": 1.424, + "step": 1036 + }, + { + "epoch": 1.12, + "learning_rate": 1.4788443310758263e-05, + "loss": 1.5855, + "step": 1037 + }, + { + "epoch": 1.12, + "learning_rate": 1.477796514725895e-05, + "loss": 1.5577, + "step": 1038 + }, + { + "epoch": 1.12, + "learning_rate": 1.4767480181691888e-05, + "loss": 1.469, + "step": 1039 + }, + { + "epoch": 1.13, + "learning_rate": 1.475698842898382e-05, + "loss": 2.0158, + "step": 1040 + }, + { + "epoch": 1.13, + "learning_rate": 1.4746489904071148e-05, + "loss": 1.7775, + "step": 1041 + }, + { + "epoch": 1.13, + "learning_rate": 1.4735984621899917e-05, + "loss": 1.5493, + "step": 1042 + }, + { + "epoch": 1.13, + "learning_rate": 1.472547259742579e-05, + "loss": 1.7657, + "step": 1043 + }, + { + "epoch": 1.13, + "learning_rate": 1.4714953845614028e-05, + "loss": 1.7562, + "step": 1044 + }, + { + "epoch": 1.13, + "learning_rate": 1.4704428381439471e-05, + "loss": 1.4397, + "step": 1045 + }, + { + "epoch": 1.13, + "learning_rate": 1.4693896219886518e-05, + "loss": 1.5493, + "step": 1046 + }, + { + "epoch": 1.13, + "learning_rate": 1.4683357375949099e-05, + "loss": 1.7458, + "step": 1047 + }, + { + "epoch": 1.13, + "learning_rate": 1.467281186463065e-05, + "loss": 1.7092, + "step": 1048 + }, + { + "epoch": 1.14, + "learning_rate": 1.4662259700944117e-05, + "loss": 1.7902, + "step": 1049 + }, + { + "epoch": 1.14, + "learning_rate": 1.46517008999119e-05, + "loss": 1.6822, + "step": 1050 + }, + { + "epoch": 1.14, + "learning_rate": 1.4641135476565853e-05, + "loss": 1.4869, + "step": 1051 + }, + { + "epoch": 1.14, + "learning_rate": 1.4630563445947265e-05, + "loss": 1.2409, + "step": 1052 + }, + { + "epoch": 1.14, + "learning_rate": 1.4619984823106821e-05, + "loss": 1.6679, + "step": 1053 + }, + { + "epoch": 1.14, + "learning_rate": 1.4609399623104594e-05, + "loss": 1.4442, + "step": 1054 + }, + { + "epoch": 1.14, + "learning_rate": 1.4598807861010023e-05, + "loss": 1.7156, + "step": 1055 + }, + { + "epoch": 1.14, + "learning_rate": 1.4588209551901886e-05, + "loss": 1.8078, + "step": 1056 + }, + { + "epoch": 1.14, + "learning_rate": 1.4577604710868288e-05, + "loss": 1.7926, + "step": 1057 + }, + { + "epoch": 1.15, + "learning_rate": 1.4566993353006622e-05, + "loss": 1.2587, + "step": 1058 + }, + { + "epoch": 1.15, + "learning_rate": 1.4556375493423572e-05, + "loss": 1.9043, + "step": 1059 + }, + { + "epoch": 1.15, + "learning_rate": 1.4545751147235063e-05, + "loss": 1.6593, + "step": 1060 + }, + { + "epoch": 1.15, + "learning_rate": 1.4535120329566268e-05, + "loss": 1.9199, + "step": 1061 + }, + { + "epoch": 1.15, + "learning_rate": 1.4524483055551561e-05, + "loss": 1.39, + "step": 1062 + }, + { + "epoch": 1.15, + "learning_rate": 1.4513839340334523e-05, + "loss": 1.5897, + "step": 1063 + }, + { + "epoch": 1.15, + "learning_rate": 1.4503189199067891e-05, + "loss": 1.695, + "step": 1064 + }, + { + "epoch": 1.15, + "learning_rate": 1.4492532646913553e-05, + "loss": 1.5598, + "step": 1065 + }, + { + "epoch": 1.15, + "learning_rate": 1.448186969904253e-05, + "loss": 1.6194, + "step": 1066 + }, + { + "epoch": 1.15, + "learning_rate": 1.447120037063494e-05, + "loss": 1.4702, + "step": 1067 + }, + { + "epoch": 1.16, + "learning_rate": 1.446052467687999e-05, + "loss": 1.673, + "step": 1068 + }, + { + "epoch": 1.16, + "learning_rate": 1.4449842632975948e-05, + "loss": 1.6351, + "step": 1069 + }, + { + "epoch": 1.16, + "learning_rate": 1.4439154254130123e-05, + "loss": 1.6767, + "step": 1070 + }, + { + "epoch": 1.16, + "learning_rate": 1.4428459555558841e-05, + "loss": 1.518, + "step": 1071 + }, + { + "epoch": 1.16, + "learning_rate": 1.4417758552487424e-05, + "loss": 1.6418, + "step": 1072 + }, + { + "epoch": 1.16, + "learning_rate": 1.4407051260150167e-05, + "loss": 1.4267, + "step": 1073 + }, + { + "epoch": 1.16, + "learning_rate": 1.4396337693790329e-05, + "loss": 1.392, + "step": 1074 + }, + { + "epoch": 1.16, + "learning_rate": 1.4385617868660094e-05, + "loss": 1.6811, + "step": 1075 + }, + { + "epoch": 1.16, + "learning_rate": 1.4374891800020549e-05, + "loss": 1.3297, + "step": 1076 + }, + { + "epoch": 1.17, + "learning_rate": 1.4364159503141684e-05, + "loss": 1.9199, + "step": 1077 + }, + { + "epoch": 1.17, + "learning_rate": 1.4353420993302346e-05, + "loss": 1.5111, + "step": 1078 + }, + { + "epoch": 1.17, + "learning_rate": 1.4342676285790224e-05, + "loss": 1.3439, + "step": 1079 + }, + { + "epoch": 1.17, + "learning_rate": 1.4331925395901847e-05, + "loss": 1.7453, + "step": 1080 + }, + { + "epoch": 1.17, + "learning_rate": 1.4321168338942528e-05, + "loss": 1.867, + "step": 1081 + }, + { + "epoch": 1.17, + "learning_rate": 1.4310405130226365e-05, + "loss": 1.537, + "step": 1082 + }, + { + "epoch": 1.17, + "learning_rate": 1.4299635785076214e-05, + "loss": 1.4933, + "step": 1083 + }, + { + "epoch": 1.17, + "learning_rate": 1.4288860318823673e-05, + "loss": 1.7592, + "step": 1084 + }, + { + "epoch": 1.17, + "learning_rate": 1.4278078746809039e-05, + "loss": 1.4784, + "step": 1085 + }, + { + "epoch": 1.18, + "learning_rate": 1.4267291084381322e-05, + "loss": 1.5672, + "step": 1086 + }, + { + "epoch": 1.18, + "learning_rate": 1.4256497346898186e-05, + "loss": 1.5483, + "step": 1087 + }, + { + "epoch": 1.18, + "learning_rate": 1.4245697549725951e-05, + "loss": 1.753, + "step": 1088 + }, + { + "epoch": 1.18, + "learning_rate": 1.4234891708239562e-05, + "loss": 1.6669, + "step": 1089 + }, + { + "epoch": 1.18, + "learning_rate": 1.4224079837822566e-05, + "loss": 1.6799, + "step": 1090 + }, + { + "epoch": 1.18, + "learning_rate": 1.4213261953867099e-05, + "loss": 1.629, + "step": 1091 + }, + { + "epoch": 1.18, + "learning_rate": 1.4202438071773856e-05, + "loss": 1.4544, + "step": 1092 + }, + { + "epoch": 1.18, + "learning_rate": 1.4191608206952069e-05, + "loss": 1.7615, + "step": 1093 + }, + { + "epoch": 1.18, + "learning_rate": 1.4180772374819489e-05, + "loss": 1.5733, + "step": 1094 + }, + { + "epoch": 1.19, + "learning_rate": 1.416993059080236e-05, + "loss": 1.4325, + "step": 1095 + }, + { + "epoch": 1.19, + "learning_rate": 1.4159082870335402e-05, + "loss": 1.5521, + "step": 1096 + }, + { + "epoch": 1.19, + "learning_rate": 1.4148229228861782e-05, + "loss": 1.6471, + "step": 1097 + }, + { + "epoch": 1.19, + "learning_rate": 1.4137369681833106e-05, + "loss": 1.5429, + "step": 1098 + }, + { + "epoch": 1.19, + "learning_rate": 1.4126504244709377e-05, + "loss": 1.7703, + "step": 1099 + }, + { + "epoch": 1.19, + "learning_rate": 1.4115632932958992e-05, + "loss": 1.5304, + "step": 1100 + }, + { + "epoch": 1.19, + "learning_rate": 1.4104755762058701e-05, + "loss": 1.6547, + "step": 1101 + }, + { + "epoch": 1.19, + "learning_rate": 1.40938727474936e-05, + "loss": 1.7368, + "step": 1102 + }, + { + "epoch": 1.19, + "learning_rate": 1.408298390475711e-05, + "loss": 1.4461, + "step": 1103 + }, + { + "epoch": 1.19, + "learning_rate": 1.4072089249350942e-05, + "loss": 1.7066, + "step": 1104 + }, + { + "epoch": 1.2, + "learning_rate": 1.4061188796785085e-05, + "loss": 1.745, + "step": 1105 + }, + { + "epoch": 1.2, + "learning_rate": 1.4050282562577782e-05, + "loss": 1.6486, + "step": 1106 + }, + { + "epoch": 1.2, + "learning_rate": 1.4039370562255501e-05, + "loss": 1.4831, + "step": 1107 + }, + { + "epoch": 1.2, + "learning_rate": 1.4028452811352926e-05, + "loss": 1.54, + "step": 1108 + }, + { + "epoch": 1.2, + "learning_rate": 1.4017529325412926e-05, + "loss": 1.6582, + "step": 1109 + }, + { + "epoch": 1.2, + "learning_rate": 1.400660011998653e-05, + "loss": 1.7603, + "step": 1110 + }, + { + "epoch": 1.2, + "learning_rate": 1.3995665210632918e-05, + "loss": 1.6216, + "step": 1111 + }, + { + "epoch": 1.2, + "learning_rate": 1.398472461291938e-05, + "loss": 1.6027, + "step": 1112 + }, + { + "epoch": 1.2, + "learning_rate": 1.3973778342421314e-05, + "loss": 1.6572, + "step": 1113 + }, + { + "epoch": 1.21, + "learning_rate": 1.3962826414722185e-05, + "loss": 1.4402, + "step": 1114 + }, + { + "epoch": 1.21, + "learning_rate": 1.395186884541352e-05, + "loss": 1.6595, + "step": 1115 + }, + { + "epoch": 1.21, + "learning_rate": 1.3940905650094874e-05, + "loss": 1.5473, + "step": 1116 + }, + { + "epoch": 1.21, + "learning_rate": 1.3929936844373806e-05, + "loss": 1.653, + "step": 1117 + }, + { + "epoch": 1.21, + "learning_rate": 1.391896244386587e-05, + "loss": 1.4939, + "step": 1118 + }, + { + "epoch": 1.21, + "learning_rate": 1.3907982464194584e-05, + "loss": 1.4917, + "step": 1119 + }, + { + "epoch": 1.21, + "learning_rate": 1.3896996920991409e-05, + "loss": 1.3911, + "step": 1120 + }, + { + "epoch": 1.21, + "learning_rate": 1.3886005829895717e-05, + "loss": 1.6069, + "step": 1121 + }, + { + "epoch": 1.21, + "learning_rate": 1.387500920655479e-05, + "loss": 1.683, + "step": 1122 + }, + { + "epoch": 1.22, + "learning_rate": 1.3864007066623782e-05, + "loss": 1.9023, + "step": 1123 + }, + { + "epoch": 1.22, + "learning_rate": 1.3852999425765697e-05, + "loss": 1.5886, + "step": 1124 + }, + { + "epoch": 1.22, + "learning_rate": 1.384198629965137e-05, + "loss": 1.5902, + "step": 1125 + }, + { + "epoch": 1.22, + "learning_rate": 1.3830967703959458e-05, + "loss": 1.7075, + "step": 1126 + }, + { + "epoch": 1.22, + "learning_rate": 1.3819943654376393e-05, + "loss": 1.8132, + "step": 1127 + }, + { + "epoch": 1.22, + "learning_rate": 1.3808914166596367e-05, + "loss": 1.7699, + "step": 1128 + }, + { + "epoch": 1.22, + "learning_rate": 1.3797879256321323e-05, + "loss": 1.8824, + "step": 1129 + }, + { + "epoch": 1.22, + "learning_rate": 1.378683893926092e-05, + "loss": 1.7651, + "step": 1130 + }, + { + "epoch": 1.22, + "learning_rate": 1.3775793231132515e-05, + "loss": 1.7531, + "step": 1131 + }, + { + "epoch": 1.23, + "learning_rate": 1.3764742147661143e-05, + "loss": 1.5328, + "step": 1132 + }, + { + "epoch": 1.23, + "learning_rate": 1.3753685704579489e-05, + "loss": 1.5323, + "step": 1133 + }, + { + "epoch": 1.23, + "learning_rate": 1.3742623917627864e-05, + "loss": 1.6862, + "step": 1134 + }, + { + "epoch": 1.23, + "learning_rate": 1.373155680255419e-05, + "loss": 1.6841, + "step": 1135 + }, + { + "epoch": 1.23, + "learning_rate": 1.3720484375113978e-05, + "loss": 1.5371, + "step": 1136 + }, + { + "epoch": 1.23, + "learning_rate": 1.3709406651070299e-05, + "loss": 1.6985, + "step": 1137 + }, + { + "epoch": 1.23, + "learning_rate": 1.3698323646193758e-05, + "loss": 1.6268, + "step": 1138 + }, + { + "epoch": 1.23, + "learning_rate": 1.3687235376262492e-05, + "loss": 1.5333, + "step": 1139 + }, + { + "epoch": 1.23, + "learning_rate": 1.3676141857062117e-05, + "loss": 1.7055, + "step": 1140 + }, + { + "epoch": 1.23, + "learning_rate": 1.366504310438574e-05, + "loss": 1.4433, + "step": 1141 + }, + { + "epoch": 1.24, + "learning_rate": 1.3653939134033897e-05, + "loss": 1.8203, + "step": 1142 + }, + { + "epoch": 1.24, + "learning_rate": 1.3642829961814577e-05, + "loss": 1.6585, + "step": 1143 + }, + { + "epoch": 1.24, + "learning_rate": 1.3631715603543153e-05, + "loss": 1.4763, + "step": 1144 + }, + { + "epoch": 1.24, + "learning_rate": 1.362059607504239e-05, + "loss": 1.8157, + "step": 1145 + }, + { + "epoch": 1.24, + "learning_rate": 1.3609471392142419e-05, + "loss": 1.5029, + "step": 1146 + }, + { + "epoch": 1.24, + "learning_rate": 1.3598341570680698e-05, + "loss": 1.5438, + "step": 1147 + }, + { + "epoch": 1.24, + "learning_rate": 1.3587206626502004e-05, + "loss": 1.4743, + "step": 1148 + }, + { + "epoch": 1.24, + "learning_rate": 1.3576066575458415e-05, + "loss": 1.8572, + "step": 1149 + }, + { + "epoch": 1.24, + "learning_rate": 1.3564921433409268e-05, + "loss": 1.5078, + "step": 1150 + }, + { + "epoch": 1.25, + "learning_rate": 1.3553771216221155e-05, + "loss": 1.2459, + "step": 1151 + }, + { + "epoch": 1.25, + "learning_rate": 1.3542615939767882e-05, + "loss": 1.7684, + "step": 1152 + }, + { + "epoch": 1.25, + "learning_rate": 1.3531455619930481e-05, + "loss": 1.4721, + "step": 1153 + }, + { + "epoch": 1.25, + "learning_rate": 1.3520290272597135e-05, + "loss": 1.4634, + "step": 1154 + }, + { + "epoch": 1.25, + "learning_rate": 1.3509119913663206e-05, + "loss": 1.5922, + "step": 1155 + }, + { + "epoch": 1.25, + "learning_rate": 1.3497944559031185e-05, + "loss": 1.58, + "step": 1156 + }, + { + "epoch": 1.25, + "learning_rate": 1.3486764224610667e-05, + "loss": 1.8095, + "step": 1157 + }, + { + "epoch": 1.25, + "learning_rate": 1.3475578926318343e-05, + "loss": 1.6782, + "step": 1158 + }, + { + "epoch": 1.25, + "learning_rate": 1.3464388680077973e-05, + "loss": 1.4844, + "step": 1159 + }, + { + "epoch": 1.26, + "learning_rate": 1.345319350182036e-05, + "loss": 1.3102, + "step": 1160 + }, + { + "epoch": 1.26, + "learning_rate": 1.3441993407483321e-05, + "loss": 1.719, + "step": 1161 + }, + { + "epoch": 1.26, + "learning_rate": 1.343078841301168e-05, + "loss": 1.6825, + "step": 1162 + }, + { + "epoch": 1.26, + "learning_rate": 1.3419578534357236e-05, + "loss": 1.7969, + "step": 1163 + }, + { + "epoch": 1.26, + "learning_rate": 1.3408363787478736e-05, + "loss": 1.7454, + "step": 1164 + }, + { + "epoch": 1.26, + "learning_rate": 1.3397144188341865e-05, + "loss": 1.7881, + "step": 1165 + }, + { + "epoch": 1.26, + "learning_rate": 1.3385919752919208e-05, + "loss": 1.8708, + "step": 1166 + }, + { + "epoch": 1.26, + "learning_rate": 1.3374690497190244e-05, + "loss": 1.6696, + "step": 1167 + }, + { + "epoch": 1.26, + "learning_rate": 1.3363456437141305e-05, + "loss": 1.6845, + "step": 1168 + }, + { + "epoch": 1.27, + "learning_rate": 1.335221758876557e-05, + "loss": 1.7927, + "step": 1169 + }, + { + "epoch": 1.27, + "learning_rate": 1.334097396806303e-05, + "loss": 1.5664, + "step": 1170 + }, + { + "epoch": 1.27, + "learning_rate": 1.332972559104047e-05, + "loss": 1.7425, + "step": 1171 + }, + { + "epoch": 1.27, + "learning_rate": 1.3318472473711453e-05, + "loss": 1.5114, + "step": 1172 + }, + { + "epoch": 1.27, + "learning_rate": 1.3307214632096282e-05, + "loss": 1.4314, + "step": 1173 + }, + { + "epoch": 1.27, + "learning_rate": 1.329595208222199e-05, + "loss": 1.7526, + "step": 1174 + }, + { + "epoch": 1.27, + "learning_rate": 1.3284684840122313e-05, + "loss": 1.693, + "step": 1175 + }, + { + "epoch": 1.27, + "learning_rate": 1.3273412921837663e-05, + "loss": 1.6599, + "step": 1176 + }, + { + "epoch": 1.27, + "learning_rate": 1.3262136343415117e-05, + "loss": 1.7349, + "step": 1177 + }, + { + "epoch": 1.27, + "learning_rate": 1.3250855120908379e-05, + "loss": 1.6321, + "step": 1178 + }, + { + "epoch": 1.28, + "learning_rate": 1.3239569270377768e-05, + "loss": 1.6261, + "step": 1179 + }, + { + "epoch": 1.28, + "learning_rate": 1.3228278807890184e-05, + "loss": 1.5685, + "step": 1180 + }, + { + "epoch": 1.28, + "learning_rate": 1.3216983749519112e-05, + "loss": 1.6259, + "step": 1181 + }, + { + "epoch": 1.28, + "learning_rate": 1.3205684111344557e-05, + "loss": 1.7537, + "step": 1182 + }, + { + "epoch": 1.28, + "learning_rate": 1.319437990945306e-05, + "loss": 1.6813, + "step": 1183 + }, + { + "epoch": 1.28, + "learning_rate": 1.3183071159937649e-05, + "loss": 1.5848, + "step": 1184 + }, + { + "epoch": 1.28, + "learning_rate": 1.3171757878897831e-05, + "loss": 1.9219, + "step": 1185 + }, + { + "epoch": 1.28, + "learning_rate": 1.3160440082439565e-05, + "loss": 1.6489, + "step": 1186 + }, + { + "epoch": 1.28, + "learning_rate": 1.3149117786675239e-05, + "loss": 1.7062, + "step": 1187 + }, + { + "epoch": 1.29, + "learning_rate": 1.3137791007723636e-05, + "loss": 1.5602, + "step": 1188 + }, + { + "epoch": 1.29, + "learning_rate": 1.3126459761709943e-05, + "loss": 1.7758, + "step": 1189 + }, + { + "epoch": 1.29, + "learning_rate": 1.311512406476568e-05, + "loss": 1.4631, + "step": 1190 + }, + { + "epoch": 1.29, + "learning_rate": 1.3103783933028726e-05, + "loss": 1.5428, + "step": 1191 + }, + { + "epoch": 1.29, + "learning_rate": 1.309243938264326e-05, + "loss": 1.7362, + "step": 1192 + }, + { + "epoch": 1.29, + "learning_rate": 1.3081090429759755e-05, + "loss": 1.6797, + "step": 1193 + }, + { + "epoch": 1.29, + "learning_rate": 1.3069737090534951e-05, + "loss": 1.748, + "step": 1194 + }, + { + "epoch": 1.29, + "learning_rate": 1.305837938113184e-05, + "loss": 1.6353, + "step": 1195 + }, + { + "epoch": 1.29, + "learning_rate": 1.3047017317719623e-05, + "loss": 1.533, + "step": 1196 + }, + { + "epoch": 1.3, + "learning_rate": 1.303565091647371e-05, + "loss": 1.6425, + "step": 1197 + }, + { + "epoch": 1.3, + "learning_rate": 1.302428019357568e-05, + "loss": 1.7006, + "step": 1198 + }, + { + "epoch": 1.3, + "learning_rate": 1.3012905165213265e-05, + "loss": 1.7035, + "step": 1199 + }, + { + "epoch": 1.3, + "learning_rate": 1.300152584758033e-05, + "loss": 1.4196, + "step": 1200 + }, + { + "epoch": 1.3, + "learning_rate": 1.2990142256876845e-05, + "loss": 1.6645, + "step": 1201 + }, + { + "epoch": 1.3, + "learning_rate": 1.2978754409308864e-05, + "loss": 1.687, + "step": 1202 + }, + { + "epoch": 1.3, + "learning_rate": 1.29673623210885e-05, + "loss": 1.5081, + "step": 1203 + }, + { + "epoch": 1.3, + "learning_rate": 1.2955966008433898e-05, + "loss": 1.7733, + "step": 1204 + }, + { + "epoch": 1.3, + "learning_rate": 1.2944565487569224e-05, + "loss": 1.5592, + "step": 1205 + }, + { + "epoch": 1.31, + "learning_rate": 1.293316077472464e-05, + "loss": 1.541, + "step": 1206 + }, + { + "epoch": 1.31, + "learning_rate": 1.292175188613626e-05, + "loss": 1.5144, + "step": 1207 + }, + { + "epoch": 1.31, + "learning_rate": 1.2910338838046154e-05, + "loss": 1.4817, + "step": 1208 + }, + { + "epoch": 1.31, + "learning_rate": 1.2898921646702317e-05, + "loss": 1.5223, + "step": 1209 + }, + { + "epoch": 1.31, + "learning_rate": 1.2887500328358627e-05, + "loss": 1.6485, + "step": 1210 + }, + { + "epoch": 1.31, + "learning_rate": 1.2876074899274855e-05, + "loss": 1.6818, + "step": 1211 + }, + { + "epoch": 1.31, + "learning_rate": 1.2864645375716614e-05, + "loss": 1.2953, + "step": 1212 + }, + { + "epoch": 1.31, + "learning_rate": 1.2853211773955347e-05, + "loss": 1.4401, + "step": 1213 + }, + { + "epoch": 1.31, + "learning_rate": 1.2841774110268304e-05, + "loss": 1.5226, + "step": 1214 + }, + { + "epoch": 1.31, + "learning_rate": 1.2830332400938518e-05, + "loss": 1.7461, + "step": 1215 + }, + { + "epoch": 1.32, + "learning_rate": 1.2818886662254782e-05, + "loss": 1.659, + "step": 1216 + }, + { + "epoch": 1.32, + "learning_rate": 1.2807436910511626e-05, + "loss": 1.6772, + "step": 1217 + }, + { + "epoch": 1.32, + "learning_rate": 1.279598316200929e-05, + "loss": 1.7157, + "step": 1218 + }, + { + "epoch": 1.32, + "learning_rate": 1.2784525433053707e-05, + "loss": 1.5594, + "step": 1219 + }, + { + "epoch": 1.32, + "learning_rate": 1.2773063739956473e-05, + "loss": 1.4783, + "step": 1220 + }, + { + "epoch": 1.32, + "learning_rate": 1.276159809903483e-05, + "loss": 1.5933, + "step": 1221 + }, + { + "epoch": 1.32, + "learning_rate": 1.2750128526611642e-05, + "loss": 1.5352, + "step": 1222 + }, + { + "epoch": 1.32, + "learning_rate": 1.2738655039015368e-05, + "loss": 1.5875, + "step": 1223 + }, + { + "epoch": 1.32, + "learning_rate": 1.2727177652580044e-05, + "loss": 1.6181, + "step": 1224 + }, + { + "epoch": 1.33, + "learning_rate": 1.2715696383645247e-05, + "loss": 1.3225, + "step": 1225 + }, + { + "epoch": 1.33, + "learning_rate": 1.2704211248556089e-05, + "loss": 1.7181, + "step": 1226 + }, + { + "epoch": 1.33, + "learning_rate": 1.2692722263663186e-05, + "loss": 1.8566, + "step": 1227 + }, + { + "epoch": 1.33, + "learning_rate": 1.2681229445322633e-05, + "loss": 1.712, + "step": 1228 + }, + { + "epoch": 1.33, + "learning_rate": 1.2669732809895986e-05, + "loss": 1.7477, + "step": 1229 + }, + { + "epoch": 1.33, + "learning_rate": 1.265823237375023e-05, + "loss": 1.6415, + "step": 1230 + }, + { + "epoch": 1.33, + "learning_rate": 1.2646728153257761e-05, + "loss": 1.6903, + "step": 1231 + }, + { + "epoch": 1.33, + "learning_rate": 1.2635220164796364e-05, + "loss": 1.6767, + "step": 1232 + }, + { + "epoch": 1.33, + "learning_rate": 1.262370842474919e-05, + "loss": 1.557, + "step": 1233 + }, + { + "epoch": 1.34, + "learning_rate": 1.261219294950473e-05, + "loss": 1.5915, + "step": 1234 + }, + { + "epoch": 1.34, + "learning_rate": 1.2600673755456789e-05, + "loss": 1.7127, + "step": 1235 + }, + { + "epoch": 1.34, + "learning_rate": 1.2589150859004473e-05, + "loss": 1.4802, + "step": 1236 + }, + { + "epoch": 1.34, + "learning_rate": 1.2577624276552155e-05, + "loss": 1.6932, + "step": 1237 + }, + { + "epoch": 1.34, + "learning_rate": 1.2566094024509452e-05, + "loss": 1.5752, + "step": 1238 + }, + { + "epoch": 1.34, + "learning_rate": 1.2554560119291206e-05, + "loss": 1.5841, + "step": 1239 + }, + { + "epoch": 1.34, + "learning_rate": 1.2543022577317472e-05, + "loss": 1.739, + "step": 1240 + }, + { + "epoch": 1.34, + "learning_rate": 1.2531481415013464e-05, + "loss": 1.7102, + "step": 1241 + }, + { + "epoch": 1.34, + "learning_rate": 1.2519936648809564e-05, + "loss": 1.7832, + "step": 1242 + }, + { + "epoch": 1.35, + "learning_rate": 1.2508388295141276e-05, + "loss": 1.8557, + "step": 1243 + }, + { + "epoch": 1.35, + "learning_rate": 1.249683637044922e-05, + "loss": 1.3856, + "step": 1244 + }, + { + "epoch": 1.35, + "learning_rate": 1.2485280891179086e-05, + "loss": 1.6422, + "step": 1245 + }, + { + "epoch": 1.35, + "learning_rate": 1.247372187378164e-05, + "loss": 2.1887, + "step": 1246 + }, + { + "epoch": 1.35, + "learning_rate": 1.2462159334712676e-05, + "loss": 1.9006, + "step": 1247 + }, + { + "epoch": 1.35, + "learning_rate": 1.2450593290433005e-05, + "loss": 1.7601, + "step": 1248 + }, + { + "epoch": 1.35, + "learning_rate": 1.243902375740842e-05, + "loss": 1.734, + "step": 1249 + }, + { + "epoch": 1.35, + "learning_rate": 1.2427450752109696e-05, + "loss": 1.4335, + "step": 1250 + }, + { + "epoch": 1.35, + "learning_rate": 1.2415874291012538e-05, + "loss": 1.809, + "step": 1251 + }, + { + "epoch": 1.35, + "learning_rate": 1.2404294390597576e-05, + "loss": 1.406, + "step": 1252 + }, + { + "epoch": 1.36, + "learning_rate": 1.2392711067350337e-05, + "loss": 1.5234, + "step": 1253 + }, + { + "epoch": 1.36, + "learning_rate": 1.2381124337761216e-05, + "loss": 1.7456, + "step": 1254 + }, + { + "epoch": 1.36, + "learning_rate": 1.2369534218325465e-05, + "loss": 1.8913, + "step": 1255 + }, + { + "epoch": 1.36, + "learning_rate": 1.2357940725543156e-05, + "loss": 1.6218, + "step": 1256 + }, + { + "epoch": 1.36, + "learning_rate": 1.2346343875919163e-05, + "loss": 1.6535, + "step": 1257 + }, + { + "epoch": 1.36, + "learning_rate": 1.2334743685963146e-05, + "loss": 1.4896, + "step": 1258 + }, + { + "epoch": 1.36, + "learning_rate": 1.2323140172189515e-05, + "loss": 1.3186, + "step": 1259 + }, + { + "epoch": 1.36, + "learning_rate": 1.2311533351117406e-05, + "loss": 1.5025, + "step": 1260 + }, + { + "epoch": 1.36, + "learning_rate": 1.2299923239270675e-05, + "loss": 1.5083, + "step": 1261 + }, + { + "epoch": 1.37, + "learning_rate": 1.2288309853177855e-05, + "loss": 1.4921, + "step": 1262 + }, + { + "epoch": 1.37, + "learning_rate": 1.227669320937215e-05, + "loss": 1.65, + "step": 1263 + }, + { + "epoch": 1.37, + "learning_rate": 1.2265073324391388e-05, + "loss": 1.6036, + "step": 1264 + }, + { + "epoch": 1.37, + "learning_rate": 1.225345021477802e-05, + "loss": 1.4732, + "step": 1265 + }, + { + "epoch": 1.37, + "learning_rate": 1.2241823897079084e-05, + "loss": 1.6494, + "step": 1266 + }, + { + "epoch": 1.37, + "learning_rate": 1.2230194387846189e-05, + "loss": 1.7075, + "step": 1267 + }, + { + "epoch": 1.37, + "learning_rate": 1.2218561703635484e-05, + "loss": 1.7934, + "step": 1268 + }, + { + "epoch": 1.37, + "learning_rate": 1.2206925861007639e-05, + "loss": 1.818, + "step": 1269 + }, + { + "epoch": 1.37, + "learning_rate": 1.2195286876527824e-05, + "loss": 1.7087, + "step": 1270 + }, + { + "epoch": 1.38, + "learning_rate": 1.2183644766765674e-05, + "loss": 1.5402, + "step": 1271 + }, + { + "epoch": 1.38, + "learning_rate": 1.2171999548295284e-05, + "loss": 1.6554, + "step": 1272 + }, + { + "epoch": 1.38, + "learning_rate": 1.2160351237695162e-05, + "loss": 1.5047, + "step": 1273 + }, + { + "epoch": 1.38, + "learning_rate": 1.214869985154823e-05, + "loss": 1.5085, + "step": 1274 + }, + { + "epoch": 1.38, + "learning_rate": 1.213704540644178e-05, + "loss": 1.5607, + "step": 1275 + }, + { + "epoch": 1.38, + "learning_rate": 1.2125387918967461e-05, + "loss": 1.4395, + "step": 1276 + }, + { + "epoch": 1.38, + "learning_rate": 1.2113727405721261e-05, + "loss": 1.5419, + "step": 1277 + }, + { + "epoch": 1.38, + "learning_rate": 1.2102063883303462e-05, + "loss": 1.6719, + "step": 1278 + }, + { + "epoch": 1.38, + "learning_rate": 1.2090397368318634e-05, + "loss": 1.4981, + "step": 1279 + }, + { + "epoch": 1.39, + "learning_rate": 1.207872787737562e-05, + "loss": 1.7986, + "step": 1280 + }, + { + "epoch": 1.39, + "learning_rate": 1.2067055427087482e-05, + "loss": 1.6052, + "step": 1281 + }, + { + "epoch": 1.39, + "learning_rate": 1.2055380034071504e-05, + "loss": 1.4215, + "step": 1282 + }, + { + "epoch": 1.39, + "learning_rate": 1.204370171494916e-05, + "loss": 1.5639, + "step": 1283 + }, + { + "epoch": 1.39, + "learning_rate": 1.2032020486346085e-05, + "loss": 1.6687, + "step": 1284 + }, + { + "epoch": 1.39, + "learning_rate": 1.2020336364892062e-05, + "loss": 1.7765, + "step": 1285 + }, + { + "epoch": 1.39, + "learning_rate": 1.2008649367220988e-05, + "loss": 1.7461, + "step": 1286 + }, + { + "epoch": 1.39, + "learning_rate": 1.1996959509970857e-05, + "loss": 1.542, + "step": 1287 + }, + { + "epoch": 1.39, + "learning_rate": 1.198526680978373e-05, + "loss": 1.7398, + "step": 1288 + }, + { + "epoch": 1.4, + "learning_rate": 1.197357128330572e-05, + "loss": 1.5469, + "step": 1289 + }, + { + "epoch": 1.4, + "learning_rate": 1.1961872947186958e-05, + "loss": 1.5236, + "step": 1290 + }, + { + "epoch": 1.4, + "learning_rate": 1.1950171818081583e-05, + "loss": 1.5148, + "step": 1291 + }, + { + "epoch": 1.4, + "learning_rate": 1.1938467912647708e-05, + "loss": 1.7556, + "step": 1292 + }, + { + "epoch": 1.4, + "learning_rate": 1.1926761247547392e-05, + "loss": 1.5149, + "step": 1293 + }, + { + "epoch": 1.4, + "learning_rate": 1.1915051839446627e-05, + "loss": 1.6909, + "step": 1294 + }, + { + "epoch": 1.4, + "learning_rate": 1.190333970501531e-05, + "loss": 1.7696, + "step": 1295 + }, + { + "epoch": 1.4, + "learning_rate": 1.1891624860927222e-05, + "loss": 1.7218, + "step": 1296 + }, + { + "epoch": 1.4, + "learning_rate": 1.1879907323859997e-05, + "loss": 1.5859, + "step": 1297 + }, + { + "epoch": 1.4, + "learning_rate": 1.1868187110495104e-05, + "loss": 1.8859, + "step": 1298 + }, + { + "epoch": 1.41, + "learning_rate": 1.1856464237517826e-05, + "loss": 1.793, + "step": 1299 + }, + { + "epoch": 1.41, + "learning_rate": 1.1844738721617228e-05, + "loss": 1.657, + "step": 1300 + }, + { + "epoch": 1.41, + "learning_rate": 1.1833010579486135e-05, + "loss": 1.6519, + "step": 1301 + }, + { + "epoch": 1.41, + "learning_rate": 1.1821279827821118e-05, + "loss": 1.5418, + "step": 1302 + }, + { + "epoch": 1.41, + "learning_rate": 1.1809546483322458e-05, + "loss": 1.7052, + "step": 1303 + }, + { + "epoch": 1.41, + "learning_rate": 1.1797810562694127e-05, + "loss": 1.5585, + "step": 1304 + }, + { + "epoch": 1.41, + "learning_rate": 1.1786072082643774e-05, + "loss": 1.7492, + "step": 1305 + }, + { + "epoch": 1.41, + "learning_rate": 1.1774331059882676e-05, + "loss": 1.5275, + "step": 1306 + }, + { + "epoch": 1.41, + "learning_rate": 1.1762587511125738e-05, + "loss": 1.7395, + "step": 1307 + }, + { + "epoch": 1.42, + "learning_rate": 1.1750841453091464e-05, + "loss": 1.4839, + "step": 1308 + }, + { + "epoch": 1.42, + "learning_rate": 1.1739092902501927e-05, + "loss": 1.5222, + "step": 1309 + }, + { + "epoch": 1.42, + "learning_rate": 1.1727341876082748e-05, + "loss": 1.5207, + "step": 1310 + }, + { + "epoch": 1.42, + "learning_rate": 1.1715588390563069e-05, + "loss": 1.2958, + "step": 1311 + }, + { + "epoch": 1.42, + "learning_rate": 1.1703832462675544e-05, + "loss": 1.8622, + "step": 1312 + }, + { + "epoch": 1.42, + "learning_rate": 1.1692074109156292e-05, + "loss": 1.8553, + "step": 1313 + }, + { + "epoch": 1.42, + "learning_rate": 1.1680313346744897e-05, + "loss": 1.7046, + "step": 1314 + }, + { + "epoch": 1.42, + "learning_rate": 1.1668550192184358e-05, + "loss": 1.5634, + "step": 1315 + }, + { + "epoch": 1.42, + "learning_rate": 1.165678466222109e-05, + "loss": 1.4167, + "step": 1316 + }, + { + "epoch": 1.43, + "learning_rate": 1.164501677360489e-05, + "loss": 1.4836, + "step": 1317 + }, + { + "epoch": 1.43, + "learning_rate": 1.1633246543088903e-05, + "loss": 1.4215, + "step": 1318 + }, + { + "epoch": 1.43, + "learning_rate": 1.162147398742962e-05, + "loss": 1.4499, + "step": 1319 + }, + { + "epoch": 1.43, + "learning_rate": 1.160969912338684e-05, + "loss": 1.6927, + "step": 1320 + }, + { + "epoch": 1.43, + "learning_rate": 1.1597921967723638e-05, + "loss": 1.6764, + "step": 1321 + }, + { + "epoch": 1.43, + "learning_rate": 1.1586142537206365e-05, + "loss": 1.3818, + "step": 1322 + }, + { + "epoch": 1.43, + "learning_rate": 1.15743608486046e-05, + "loss": 1.5537, + "step": 1323 + }, + { + "epoch": 1.43, + "learning_rate": 1.1562576918691141e-05, + "loss": 1.7134, + "step": 1324 + }, + { + "epoch": 1.43, + "learning_rate": 1.1550790764241979e-05, + "loss": 1.2854, + "step": 1325 + }, + { + "epoch": 1.44, + "learning_rate": 1.153900240203627e-05, + "loss": 1.4848, + "step": 1326 + }, + { + "epoch": 1.44, + "learning_rate": 1.152721184885631e-05, + "loss": 1.448, + "step": 1327 + }, + { + "epoch": 1.44, + "learning_rate": 1.1515419121487522e-05, + "loss": 1.7475, + "step": 1328 + }, + { + "epoch": 1.44, + "learning_rate": 1.150362423671841e-05, + "loss": 1.4458, + "step": 1329 + }, + { + "epoch": 1.44, + "learning_rate": 1.1491827211340568e-05, + "loss": 1.3791, + "step": 1330 + }, + { + "epoch": 1.44, + "learning_rate": 1.1480028062148622e-05, + "loss": 1.6132, + "step": 1331 + }, + { + "epoch": 1.44, + "learning_rate": 1.1468226805940227e-05, + "loss": 1.4072, + "step": 1332 + }, + { + "epoch": 1.44, + "learning_rate": 1.1456423459516047e-05, + "loss": 1.5475, + "step": 1333 + }, + { + "epoch": 1.44, + "learning_rate": 1.1444618039679702e-05, + "loss": 1.5749, + "step": 1334 + }, + { + "epoch": 1.44, + "learning_rate": 1.143281056323778e-05, + "loss": 1.7015, + "step": 1335 + }, + { + "epoch": 1.45, + "learning_rate": 1.1421001046999787e-05, + "loss": 1.4643, + "step": 1336 + }, + { + "epoch": 1.45, + "learning_rate": 1.1409189507778143e-05, + "loss": 1.5289, + "step": 1337 + }, + { + "epoch": 1.45, + "learning_rate": 1.1397375962388137e-05, + "loss": 1.7965, + "step": 1338 + }, + { + "epoch": 1.45, + "learning_rate": 1.1385560427647923e-05, + "loss": 1.617, + "step": 1339 + }, + { + "epoch": 1.45, + "learning_rate": 1.1373742920378483e-05, + "loss": 1.5381, + "step": 1340 + }, + { + "epoch": 1.45, + "learning_rate": 1.1361923457403607e-05, + "loss": 1.6884, + "step": 1341 + }, + { + "epoch": 1.45, + "learning_rate": 1.1350102055549868e-05, + "loss": 1.4769, + "step": 1342 + }, + { + "epoch": 1.45, + "learning_rate": 1.1338278731646603e-05, + "loss": 1.6484, + "step": 1343 + }, + { + "epoch": 1.45, + "learning_rate": 1.1326453502525886e-05, + "loss": 1.5862, + "step": 1344 + }, + { + "epoch": 1.46, + "learning_rate": 1.1314626385022493e-05, + "loss": 1.4017, + "step": 1345 + }, + { + "epoch": 1.46, + "learning_rate": 1.1302797395973906e-05, + "loss": 1.5226, + "step": 1346 + }, + { + "epoch": 1.46, + "learning_rate": 1.1290966552220253e-05, + "loss": 1.5853, + "step": 1347 + }, + { + "epoch": 1.46, + "learning_rate": 1.1279133870604313e-05, + "loss": 1.6298, + "step": 1348 + }, + { + "epoch": 1.46, + "learning_rate": 1.1267299367971482e-05, + "loss": 1.7236, + "step": 1349 + }, + { + "epoch": 1.46, + "learning_rate": 1.1255463061169744e-05, + "loss": 1.5666, + "step": 1350 + }, + { + "epoch": 1.46, + "learning_rate": 1.1243624967049653e-05, + "loss": 1.6452, + "step": 1351 + }, + { + "epoch": 1.46, + "learning_rate": 1.1231785102464307e-05, + "loss": 1.5262, + "step": 1352 + }, + { + "epoch": 1.46, + "learning_rate": 1.1219943484269329e-05, + "loss": 1.8981, + "step": 1353 + }, + { + "epoch": 1.47, + "learning_rate": 1.1208100129322827e-05, + "loss": 1.8015, + "step": 1354 + }, + { + "epoch": 1.47, + "learning_rate": 1.1196255054485396e-05, + "loss": 1.7094, + "step": 1355 + }, + { + "epoch": 1.47, + "learning_rate": 1.1184408276620074e-05, + "loss": 1.8474, + "step": 1356 + }, + { + "epoch": 1.47, + "learning_rate": 1.1172559812592316e-05, + "loss": 1.4666, + "step": 1357 + }, + { + "epoch": 1.47, + "learning_rate": 1.1160709679269986e-05, + "loss": 1.5856, + "step": 1358 + }, + { + "epoch": 1.47, + "learning_rate": 1.114885789352332e-05, + "loss": 1.7492, + "step": 1359 + }, + { + "epoch": 1.47, + "learning_rate": 1.1137004472224915e-05, + "loss": 1.6024, + "step": 1360 + }, + { + "epoch": 1.47, + "learning_rate": 1.1125149432249687e-05, + "loss": 1.6389, + "step": 1361 + }, + { + "epoch": 1.47, + "learning_rate": 1.1113292790474857e-05, + "loss": 1.4293, + "step": 1362 + }, + { + "epoch": 1.48, + "learning_rate": 1.1101434563779929e-05, + "loss": 1.8504, + "step": 1363 + }, + { + "epoch": 1.48, + "learning_rate": 1.1089574769046665e-05, + "loss": 1.6104, + "step": 1364 + }, + { + "epoch": 1.48, + "learning_rate": 1.1077713423159056e-05, + "loss": 1.5039, + "step": 1365 + }, + { + "epoch": 1.48, + "learning_rate": 1.1065850543003302e-05, + "loss": 1.5566, + "step": 1366 + }, + { + "epoch": 1.48, + "learning_rate": 1.1053986145467795e-05, + "loss": 1.4938, + "step": 1367 + }, + { + "epoch": 1.48, + "learning_rate": 1.104212024744307e-05, + "loss": 1.6359, + "step": 1368 + }, + { + "epoch": 1.48, + "learning_rate": 1.1030252865821814e-05, + "loss": 1.5906, + "step": 1369 + }, + { + "epoch": 1.48, + "learning_rate": 1.1018384017498817e-05, + "loss": 1.6035, + "step": 1370 + }, + { + "epoch": 1.48, + "learning_rate": 1.1006513719370963e-05, + "loss": 1.7358, + "step": 1371 + }, + { + "epoch": 1.48, + "learning_rate": 1.09946419883372e-05, + "loss": 1.7031, + "step": 1372 + }, + { + "epoch": 1.49, + "learning_rate": 1.0982768841298505e-05, + "loss": 1.6675, + "step": 1373 + }, + { + "epoch": 1.49, + "learning_rate": 1.0970894295157887e-05, + "loss": 1.7351, + "step": 1374 + }, + { + "epoch": 1.49, + "learning_rate": 1.0959018366820337e-05, + "loss": 1.5233, + "step": 1375 + }, + { + "epoch": 1.49, + "learning_rate": 1.0947141073192813e-05, + "loss": 1.3807, + "step": 1376 + }, + { + "epoch": 1.49, + "learning_rate": 1.0935262431184224e-05, + "loss": 1.6437, + "step": 1377 + }, + { + "epoch": 1.49, + "learning_rate": 1.092338245770539e-05, + "loss": 1.5475, + "step": 1378 + }, + { + "epoch": 1.49, + "learning_rate": 1.0911501169669032e-05, + "loss": 1.7788, + "step": 1379 + }, + { + "epoch": 1.49, + "learning_rate": 1.089961858398974e-05, + "loss": 1.6773, + "step": 1380 + }, + { + "epoch": 1.49, + "learning_rate": 1.0887734717583953e-05, + "loss": 1.4647, + "step": 1381 + }, + { + "epoch": 1.5, + "learning_rate": 1.0875849587369931e-05, + "loss": 1.4252, + "step": 1382 + }, + { + "epoch": 1.5, + "learning_rate": 1.086396321026774e-05, + "loss": 1.5415, + "step": 1383 + }, + { + "epoch": 1.5, + "learning_rate": 1.0852075603199213e-05, + "loss": 1.4866, + "step": 1384 + }, + { + "epoch": 1.5, + "learning_rate": 1.0840186783087936e-05, + "loss": 1.3765, + "step": 1385 + }, + { + "epoch": 1.5, + "learning_rate": 1.0828296766859224e-05, + "loss": 1.6217, + "step": 1386 + }, + { + "epoch": 1.5, + "learning_rate": 1.0816405571440089e-05, + "loss": 1.6645, + "step": 1387 + }, + { + "epoch": 1.5, + "learning_rate": 1.0804513213759234e-05, + "loss": 1.3922, + "step": 1388 + }, + { + "epoch": 1.5, + "learning_rate": 1.0792619710747004e-05, + "loss": 1.5749, + "step": 1389 + }, + { + "epoch": 1.5, + "learning_rate": 1.0780725079335383e-05, + "loss": 1.5204, + "step": 1390 + }, + { + "epoch": 1.51, + "learning_rate": 1.0768829336457958e-05, + "loss": 1.2803, + "step": 1391 + }, + { + "epoch": 1.51, + "learning_rate": 1.0756932499049894e-05, + "loss": 1.6384, + "step": 1392 + }, + { + "epoch": 1.51, + "learning_rate": 1.074503458404792e-05, + "loss": 1.4258, + "step": 1393 + }, + { + "epoch": 1.51, + "learning_rate": 1.07331356083903e-05, + "loss": 1.4932, + "step": 1394 + }, + { + "epoch": 1.51, + "learning_rate": 1.0721235589016805e-05, + "loss": 1.5761, + "step": 1395 + }, + { + "epoch": 1.51, + "learning_rate": 1.0709334542868692e-05, + "loss": 1.8186, + "step": 1396 + }, + { + "epoch": 1.51, + "learning_rate": 1.0697432486888681e-05, + "loss": 1.5917, + "step": 1397 + }, + { + "epoch": 1.51, + "learning_rate": 1.068552943802093e-05, + "loss": 1.7973, + "step": 1398 + }, + { + "epoch": 1.51, + "learning_rate": 1.0673625413211002e-05, + "loss": 1.5501, + "step": 1399 + }, + { + "epoch": 1.52, + "learning_rate": 1.0661720429405866e-05, + "loss": 1.5497, + "step": 1400 + }, + { + "epoch": 1.52, + "learning_rate": 1.0649814503553844e-05, + "loss": 1.4764, + "step": 1401 + }, + { + "epoch": 1.52, + "learning_rate": 1.06379076526046e-05, + "loss": 1.4711, + "step": 1402 + }, + { + "epoch": 1.52, + "learning_rate": 1.0625999893509122e-05, + "loss": 1.713, + "step": 1403 + }, + { + "epoch": 1.52, + "learning_rate": 1.0614091243219682e-05, + "loss": 1.5149, + "step": 1404 + }, + { + "epoch": 1.52, + "learning_rate": 1.0602181718689822e-05, + "loss": 1.4376, + "step": 1405 + }, + { + "epoch": 1.52, + "learning_rate": 1.0590271336874339e-05, + "loss": 1.6151, + "step": 1406 + }, + { + "epoch": 1.52, + "learning_rate": 1.0578360114729236e-05, + "loss": 1.4859, + "step": 1407 + }, + { + "epoch": 1.52, + "learning_rate": 1.0566448069211723e-05, + "loss": 1.3853, + "step": 1408 + }, + { + "epoch": 1.52, + "learning_rate": 1.0554535217280178e-05, + "loss": 1.7395, + "step": 1409 + }, + { + "epoch": 1.53, + "learning_rate": 1.0542621575894125e-05, + "loss": 1.4653, + "step": 1410 + }, + { + "epoch": 1.53, + "learning_rate": 1.0530707162014218e-05, + "loss": 1.6122, + "step": 1411 + }, + { + "epoch": 1.53, + "learning_rate": 1.0518791992602204e-05, + "loss": 1.6813, + "step": 1412 + }, + { + "epoch": 1.53, + "learning_rate": 1.0506876084620911e-05, + "loss": 1.5162, + "step": 1413 + }, + { + "epoch": 1.53, + "learning_rate": 1.0494959455034215e-05, + "loss": 1.5368, + "step": 1414 + }, + { + "epoch": 1.53, + "learning_rate": 1.0483042120807016e-05, + "loss": 1.6324, + "step": 1415 + }, + { + "epoch": 1.53, + "learning_rate": 1.0471124098905228e-05, + "loss": 1.4531, + "step": 1416 + }, + { + "epoch": 1.53, + "learning_rate": 1.0459205406295737e-05, + "loss": 1.5531, + "step": 1417 + }, + { + "epoch": 1.53, + "learning_rate": 1.044728605994638e-05, + "loss": 1.5739, + "step": 1418 + }, + { + "epoch": 1.54, + "learning_rate": 1.0435366076825933e-05, + "loss": 1.7043, + "step": 1419 + }, + { + "epoch": 1.54, + "learning_rate": 1.0423445473904072e-05, + "loss": 1.6949, + "step": 1420 + }, + { + "epoch": 1.54, + "learning_rate": 1.0411524268151359e-05, + "loss": 1.5442, + "step": 1421 + }, + { + "epoch": 1.54, + "learning_rate": 1.039960247653921e-05, + "loss": 1.642, + "step": 1422 + }, + { + "epoch": 1.54, + "learning_rate": 1.0387680116039884e-05, + "loss": 1.4562, + "step": 1423 + }, + { + "epoch": 1.54, + "learning_rate": 1.0375757203626445e-05, + "loss": 1.5614, + "step": 1424 + }, + { + "epoch": 1.54, + "learning_rate": 1.0363833756272734e-05, + "loss": 1.5004, + "step": 1425 + }, + { + "epoch": 1.54, + "learning_rate": 1.0351909790953367e-05, + "loss": 1.6549, + "step": 1426 + }, + { + "epoch": 1.54, + "learning_rate": 1.0339985324643688e-05, + "loss": 1.6174, + "step": 1427 + }, + { + "epoch": 1.55, + "learning_rate": 1.0328060374319759e-05, + "loss": 1.65, + "step": 1428 + }, + { + "epoch": 1.55, + "learning_rate": 1.0316134956958334e-05, + "loss": 1.7216, + "step": 1429 + }, + { + "epoch": 1.55, + "learning_rate": 1.0304209089536827e-05, + "loss": 1.6205, + "step": 1430 + }, + { + "epoch": 1.55, + "learning_rate": 1.0292282789033292e-05, + "loss": 1.6978, + "step": 1431 + }, + { + "epoch": 1.55, + "learning_rate": 1.0280356072426398e-05, + "loss": 1.6674, + "step": 1432 + }, + { + "epoch": 1.55, + "learning_rate": 1.0268428956695413e-05, + "loss": 1.5286, + "step": 1433 + }, + { + "epoch": 1.55, + "learning_rate": 1.0256501458820172e-05, + "loss": 1.6045, + "step": 1434 + }, + { + "epoch": 1.55, + "learning_rate": 1.0244573595781045e-05, + "loss": 1.5994, + "step": 1435 + }, + { + "epoch": 1.55, + "learning_rate": 1.0232645384558935e-05, + "loss": 1.6663, + "step": 1436 + }, + { + "epoch": 1.56, + "learning_rate": 1.0220716842135234e-05, + "loss": 1.4722, + "step": 1437 + }, + { + "epoch": 1.56, + "learning_rate": 1.0208787985491801e-05, + "loss": 1.5185, + "step": 1438 + }, + { + "epoch": 1.56, + "learning_rate": 1.0196858831610951e-05, + "loss": 1.4893, + "step": 1439 + }, + { + "epoch": 1.56, + "learning_rate": 1.018492939747542e-05, + "loss": 1.5822, + "step": 1440 + }, + { + "epoch": 1.56, + "learning_rate": 1.0172999700068338e-05, + "loss": 1.4596, + "step": 1441 + }, + { + "epoch": 1.56, + "learning_rate": 1.0161069756373212e-05, + "loss": 1.6666, + "step": 1442 + }, + { + "epoch": 1.56, + "learning_rate": 1.0149139583373906e-05, + "loss": 1.6678, + "step": 1443 + }, + { + "epoch": 1.56, + "learning_rate": 1.01372091980546e-05, + "loss": 1.535, + "step": 1444 + }, + { + "epoch": 1.56, + "learning_rate": 1.0125278617399784e-05, + "loss": 1.491, + "step": 1445 + }, + { + "epoch": 1.56, + "learning_rate": 1.0113347858394223e-05, + "loss": 1.7168, + "step": 1446 + }, + { + "epoch": 1.57, + "learning_rate": 1.0101416938022936e-05, + "loss": 1.6208, + "step": 1447 + }, + { + "epoch": 1.57, + "learning_rate": 1.0089485873271176e-05, + "loss": 1.3264, + "step": 1448 + }, + { + "epoch": 1.57, + "learning_rate": 1.0077554681124391e-05, + "loss": 1.5177, + "step": 1449 + }, + { + "epoch": 1.57, + "learning_rate": 1.0065623378568223e-05, + "loss": 1.8232, + "step": 1450 + }, + { + "epoch": 1.57, + "learning_rate": 1.005369198258846e-05, + "loss": 1.5726, + "step": 1451 + }, + { + "epoch": 1.57, + "learning_rate": 1.0041760510171032e-05, + "loss": 1.8903, + "step": 1452 + }, + { + "epoch": 1.57, + "learning_rate": 1.0029828978301977e-05, + "loss": 1.6815, + "step": 1453 + }, + { + "epoch": 1.57, + "learning_rate": 1.0017897403967408e-05, + "loss": 1.6521, + "step": 1454 + }, + { + "epoch": 1.57, + "learning_rate": 1.0005965804153508e-05, + "loss": 1.4628, + "step": 1455 + }, + { + "epoch": 1.58, + "learning_rate": 9.994034195846495e-06, + "loss": 1.4936, + "step": 1456 + }, + { + "epoch": 1.58, + "learning_rate": 9.982102596032597e-06, + "loss": 1.6062, + "step": 1457 + }, + { + "epoch": 1.58, + "learning_rate": 9.970171021698027e-06, + "loss": 1.5715, + "step": 1458 + }, + { + "epoch": 1.58, + "learning_rate": 9.958239489828968e-06, + "loss": 1.3657, + "step": 1459 + }, + { + "epoch": 1.58, + "learning_rate": 9.946308017411543e-06, + "loss": 1.6497, + "step": 1460 + }, + { + "epoch": 1.58, + "learning_rate": 9.93437662143178e-06, + "loss": 1.6405, + "step": 1461 + }, + { + "epoch": 1.58, + "learning_rate": 9.922445318875612e-06, + "loss": 1.3632, + "step": 1462 + }, + { + "epoch": 1.58, + "learning_rate": 9.910514126728827e-06, + "loss": 1.4618, + "step": 1463 + }, + { + "epoch": 1.58, + "learning_rate": 9.898583061977068e-06, + "loss": 1.7279, + "step": 1464 + }, + { + "epoch": 1.59, + "learning_rate": 9.88665214160578e-06, + "loss": 1.5738, + "step": 1465 + }, + { + "epoch": 1.59, + "learning_rate": 9.874721382600218e-06, + "loss": 1.8553, + "step": 1466 + }, + { + "epoch": 1.59, + "learning_rate": 9.862790801945403e-06, + "loss": 1.4768, + "step": 1467 + }, + { + "epoch": 1.59, + "learning_rate": 9.850860416626096e-06, + "loss": 1.5988, + "step": 1468 + }, + { + "epoch": 1.59, + "learning_rate": 9.838930243626791e-06, + "loss": 1.6183, + "step": 1469 + }, + { + "epoch": 1.59, + "learning_rate": 9.827000299931666e-06, + "loss": 1.5499, + "step": 1470 + }, + { + "epoch": 1.59, + "learning_rate": 9.815070602524586e-06, + "loss": 1.5744, + "step": 1471 + }, + { + "epoch": 1.59, + "learning_rate": 9.80314116838905e-06, + "loss": 1.4634, + "step": 1472 + }, + { + "epoch": 1.59, + "learning_rate": 9.7912120145082e-06, + "loss": 1.4936, + "step": 1473 + }, + { + "epoch": 1.6, + "learning_rate": 9.77928315786477e-06, + "loss": 1.7769, + "step": 1474 + }, + { + "epoch": 1.6, + "learning_rate": 9.767354615441066e-06, + "loss": 1.626, + "step": 1475 + }, + { + "epoch": 1.6, + "learning_rate": 9.755426404218958e-06, + "loss": 1.6154, + "step": 1476 + }, + { + "epoch": 1.6, + "learning_rate": 9.743498541179832e-06, + "loss": 2.0258, + "step": 1477 + }, + { + "epoch": 1.6, + "learning_rate": 9.731571043304589e-06, + "loss": 1.7951, + "step": 1478 + }, + { + "epoch": 1.6, + "learning_rate": 9.719643927573604e-06, + "loss": 1.3674, + "step": 1479 + }, + { + "epoch": 1.6, + "learning_rate": 9.70771721096671e-06, + "loss": 1.7213, + "step": 1480 + }, + { + "epoch": 1.6, + "learning_rate": 9.695790910463176e-06, + "loss": 1.8304, + "step": 1481 + }, + { + "epoch": 1.6, + "learning_rate": 9.683865043041664e-06, + "loss": 1.5519, + "step": 1482 + }, + { + "epoch": 1.6, + "learning_rate": 9.671939625680243e-06, + "loss": 1.6132, + "step": 1483 + }, + { + "epoch": 1.61, + "learning_rate": 9.660014675356315e-06, + "loss": 1.5612, + "step": 1484 + }, + { + "epoch": 1.61, + "learning_rate": 9.648090209046638e-06, + "loss": 1.5789, + "step": 1485 + }, + { + "epoch": 1.61, + "learning_rate": 9.636166243727269e-06, + "loss": 1.5749, + "step": 1486 + }, + { + "epoch": 1.61, + "learning_rate": 9.624242796373557e-06, + "loss": 1.641, + "step": 1487 + }, + { + "epoch": 1.61, + "learning_rate": 9.612319883960117e-06, + "loss": 1.5902, + "step": 1488 + }, + { + "epoch": 1.61, + "learning_rate": 9.60039752346079e-06, + "loss": 1.7535, + "step": 1489 + }, + { + "epoch": 1.61, + "learning_rate": 9.588475731848645e-06, + "loss": 1.5461, + "step": 1490 + }, + { + "epoch": 1.61, + "learning_rate": 9.576554526095931e-06, + "loss": 1.8523, + "step": 1491 + }, + { + "epoch": 1.61, + "learning_rate": 9.564633923174072e-06, + "loss": 1.739, + "step": 1492 + }, + { + "epoch": 1.62, + "learning_rate": 9.552713940053622e-06, + "loss": 1.4078, + "step": 1493 + }, + { + "epoch": 1.62, + "learning_rate": 9.540794593704265e-06, + "loss": 1.4988, + "step": 1494 + }, + { + "epoch": 1.62, + "learning_rate": 9.528875901094775e-06, + "loss": 1.371, + "step": 1495 + }, + { + "epoch": 1.62, + "learning_rate": 9.516957879192987e-06, + "loss": 1.742, + "step": 1496 + }, + { + "epoch": 1.62, + "learning_rate": 9.50504054496579e-06, + "loss": 1.8256, + "step": 1497 + }, + { + "epoch": 1.62, + "learning_rate": 9.49312391537909e-06, + "loss": 1.2345, + "step": 1498 + }, + { + "epoch": 1.62, + "learning_rate": 9.4812080073978e-06, + "loss": 1.4356, + "step": 1499 + }, + { + "epoch": 1.62, + "learning_rate": 9.469292837985786e-06, + "loss": 1.4514, + "step": 1500 + }, + { + "epoch": 1.62, + "learning_rate": 9.457378424105875e-06, + "loss": 1.6653, + "step": 1501 + }, + { + "epoch": 1.63, + "learning_rate": 9.445464782719824e-06, + "loss": 1.4308, + "step": 1502 + }, + { + "epoch": 1.63, + "learning_rate": 9.433551930788278e-06, + "loss": 1.5736, + "step": 1503 + }, + { + "epoch": 1.63, + "learning_rate": 9.421639885270769e-06, + "loss": 1.6293, + "step": 1504 + }, + { + "epoch": 1.63, + "learning_rate": 9.409728663125664e-06, + "loss": 1.529, + "step": 1505 + }, + { + "epoch": 1.63, + "learning_rate": 9.397818281310182e-06, + "loss": 1.4393, + "step": 1506 + }, + { + "epoch": 1.63, + "learning_rate": 9.385908756780323e-06, + "loss": 1.6974, + "step": 1507 + }, + { + "epoch": 1.63, + "learning_rate": 9.374000106490882e-06, + "loss": 1.33, + "step": 1508 + }, + { + "epoch": 1.63, + "learning_rate": 9.362092347395401e-06, + "loss": 1.3257, + "step": 1509 + }, + { + "epoch": 1.63, + "learning_rate": 9.350185496446157e-06, + "loss": 1.6084, + "step": 1510 + }, + { + "epoch": 1.64, + "learning_rate": 9.338279570594137e-06, + "loss": 1.5949, + "step": 1511 + }, + { + "epoch": 1.64, + "learning_rate": 9.326374586789e-06, + "loss": 1.4288, + "step": 1512 + }, + { + "epoch": 1.64, + "learning_rate": 9.314470561979076e-06, + "loss": 1.6462, + "step": 1513 + }, + { + "epoch": 1.64, + "learning_rate": 9.302567513111322e-06, + "loss": 1.6326, + "step": 1514 + }, + { + "epoch": 1.64, + "learning_rate": 9.290665457131308e-06, + "loss": 1.5415, + "step": 1515 + }, + { + "epoch": 1.64, + "learning_rate": 9.278764410983198e-06, + "loss": 1.5942, + "step": 1516 + }, + { + "epoch": 1.64, + "learning_rate": 9.266864391609702e-06, + "loss": 1.489, + "step": 1517 + }, + { + "epoch": 1.64, + "learning_rate": 9.254965415952083e-06, + "loss": 1.6327, + "step": 1518 + }, + { + "epoch": 1.64, + "learning_rate": 9.243067500950109e-06, + "loss": 1.6492, + "step": 1519 + }, + { + "epoch": 1.65, + "learning_rate": 9.231170663542048e-06, + "loss": 1.6279, + "step": 1520 + }, + { + "epoch": 1.65, + "learning_rate": 9.219274920664619e-06, + "loss": 1.6027, + "step": 1521 + }, + { + "epoch": 1.65, + "learning_rate": 9.207380289252996e-06, + "loss": 1.8429, + "step": 1522 + }, + { + "epoch": 1.65, + "learning_rate": 9.195486786240771e-06, + "loss": 1.5398, + "step": 1523 + }, + { + "epoch": 1.65, + "learning_rate": 9.183594428559913e-06, + "loss": 1.6267, + "step": 1524 + }, + { + "epoch": 1.65, + "learning_rate": 9.171703233140781e-06, + "loss": 1.7189, + "step": 1525 + }, + { + "epoch": 1.65, + "learning_rate": 9.159813216912067e-06, + "loss": 1.758, + "step": 1526 + }, + { + "epoch": 1.65, + "learning_rate": 9.147924396800792e-06, + "loss": 1.5452, + "step": 1527 + }, + { + "epoch": 1.65, + "learning_rate": 9.136036789732261e-06, + "loss": 1.5664, + "step": 1528 + }, + { + "epoch": 1.65, + "learning_rate": 9.124150412630069e-06, + "loss": 1.761, + "step": 1529 + }, + { + "epoch": 1.66, + "learning_rate": 9.11226528241605e-06, + "loss": 1.4514, + "step": 1530 + }, + { + "epoch": 1.66, + "learning_rate": 9.100381416010262e-06, + "loss": 1.4175, + "step": 1531 + }, + { + "epoch": 1.66, + "learning_rate": 9.088498830330974e-06, + "loss": 1.7212, + "step": 1532 + }, + { + "epoch": 1.66, + "learning_rate": 9.076617542294613e-06, + "loss": 1.5689, + "step": 1533 + }, + { + "epoch": 1.66, + "learning_rate": 9.064737568815783e-06, + "loss": 1.6351, + "step": 1534 + }, + { + "epoch": 1.66, + "learning_rate": 9.052858926807189e-06, + "loss": 1.569, + "step": 1535 + }, + { + "epoch": 1.66, + "learning_rate": 9.040981633179666e-06, + "loss": 1.5106, + "step": 1536 + }, + { + "epoch": 1.66, + "learning_rate": 9.029105704842114e-06, + "loss": 1.3715, + "step": 1537 + }, + { + "epoch": 1.66, + "learning_rate": 9.017231158701494e-06, + "loss": 1.5932, + "step": 1538 + }, + { + "epoch": 1.67, + "learning_rate": 9.005358011662805e-06, + "loss": 1.754, + "step": 1539 + }, + { + "epoch": 1.67, + "learning_rate": 8.993486280629039e-06, + "loss": 1.7342, + "step": 1540 + }, + { + "epoch": 1.67, + "learning_rate": 8.981615982501186e-06, + "loss": 1.8086, + "step": 1541 + }, + { + "epoch": 1.67, + "learning_rate": 8.96974713417819e-06, + "loss": 1.4102, + "step": 1542 + }, + { + "epoch": 1.67, + "learning_rate": 8.95787975255693e-06, + "loss": 1.6458, + "step": 1543 + }, + { + "epoch": 1.67, + "learning_rate": 8.94601385453221e-06, + "loss": 1.451, + "step": 1544 + }, + { + "epoch": 1.67, + "learning_rate": 8.934149456996696e-06, + "loss": 1.5115, + "step": 1545 + }, + { + "epoch": 1.67, + "learning_rate": 8.922286576840948e-06, + "loss": 1.7639, + "step": 1546 + }, + { + "epoch": 1.67, + "learning_rate": 8.910425230953339e-06, + "loss": 1.4725, + "step": 1547 + }, + { + "epoch": 1.68, + "learning_rate": 8.898565436220076e-06, + "loss": 1.5253, + "step": 1548 + }, + { + "epoch": 1.68, + "learning_rate": 8.886707209525148e-06, + "loss": 1.666, + "step": 1549 + }, + { + "epoch": 1.68, + "learning_rate": 8.874850567750315e-06, + "loss": 1.2424, + "step": 1550 + }, + { + "epoch": 1.68, + "learning_rate": 8.862995527775089e-06, + "loss": 1.428, + "step": 1551 + }, + { + "epoch": 1.68, + "learning_rate": 8.85114210647668e-06, + "loss": 1.2813, + "step": 1552 + }, + { + "epoch": 1.68, + "learning_rate": 8.839290320730018e-06, + "loss": 1.4706, + "step": 1553 + }, + { + "epoch": 1.68, + "learning_rate": 8.827440187407687e-06, + "loss": 1.7242, + "step": 1554 + }, + { + "epoch": 1.68, + "learning_rate": 8.815591723379931e-06, + "loss": 1.5579, + "step": 1555 + }, + { + "epoch": 1.68, + "learning_rate": 8.803744945514606e-06, + "loss": 1.551, + "step": 1556 + }, + { + "epoch": 1.69, + "learning_rate": 8.791899870677174e-06, + "loss": 1.331, + "step": 1557 + }, + { + "epoch": 1.69, + "learning_rate": 8.780056515730674e-06, + "loss": 1.7178, + "step": 1558 + }, + { + "epoch": 1.69, + "learning_rate": 8.768214897535693e-06, + "loss": 1.5405, + "step": 1559 + }, + { + "epoch": 1.69, + "learning_rate": 8.75637503295035e-06, + "loss": 1.6926, + "step": 1560 + }, + { + "epoch": 1.69, + "learning_rate": 8.744536938830257e-06, + "loss": 1.6215, + "step": 1561 + }, + { + "epoch": 1.69, + "learning_rate": 8.732700632028523e-06, + "loss": 1.4487, + "step": 1562 + }, + { + "epoch": 1.69, + "learning_rate": 8.720866129395689e-06, + "loss": 1.4904, + "step": 1563 + }, + { + "epoch": 1.69, + "learning_rate": 8.709033447779749e-06, + "loss": 1.3806, + "step": 1564 + }, + { + "epoch": 1.69, + "learning_rate": 8.697202604026099e-06, + "loss": 1.5946, + "step": 1565 + }, + { + "epoch": 1.69, + "learning_rate": 8.685373614977507e-06, + "loss": 1.7393, + "step": 1566 + }, + { + "epoch": 1.7, + "learning_rate": 8.673546497474119e-06, + "loss": 1.6652, + "step": 1567 + }, + { + "epoch": 1.7, + "learning_rate": 8.661721268353398e-06, + "loss": 1.6665, + "step": 1568 + }, + { + "epoch": 1.7, + "learning_rate": 8.649897944450134e-06, + "loss": 1.4041, + "step": 1569 + }, + { + "epoch": 1.7, + "learning_rate": 8.638076542596396e-06, + "loss": 1.8183, + "step": 1570 + }, + { + "epoch": 1.7, + "learning_rate": 8.626257079621517e-06, + "loss": 1.4689, + "step": 1571 + }, + { + "epoch": 1.7, + "learning_rate": 8.61443957235208e-06, + "loss": 1.4563, + "step": 1572 + }, + { + "epoch": 1.7, + "learning_rate": 8.602624037611865e-06, + "loss": 1.8297, + "step": 1573 + }, + { + "epoch": 1.7, + "learning_rate": 8.590810492221862e-06, + "loss": 1.4407, + "step": 1574 + }, + { + "epoch": 1.7, + "learning_rate": 8.578998953000218e-06, + "loss": 1.5484, + "step": 1575 + }, + { + "epoch": 1.71, + "learning_rate": 8.567189436762227e-06, + "loss": 1.57, + "step": 1576 + }, + { + "epoch": 1.71, + "learning_rate": 8.555381960320301e-06, + "loss": 1.5272, + "step": 1577 + }, + { + "epoch": 1.71, + "learning_rate": 8.543576540483955e-06, + "loss": 1.7057, + "step": 1578 + }, + { + "epoch": 1.71, + "learning_rate": 8.531773194059774e-06, + "loss": 1.5611, + "step": 1579 + }, + { + "epoch": 1.71, + "learning_rate": 8.51997193785138e-06, + "loss": 1.3945, + "step": 1580 + }, + { + "epoch": 1.71, + "learning_rate": 8.508172788659436e-06, + "loss": 1.7305, + "step": 1581 + }, + { + "epoch": 1.71, + "learning_rate": 8.496375763281591e-06, + "loss": 1.6835, + "step": 1582 + }, + { + "epoch": 1.71, + "learning_rate": 8.484580878512485e-06, + "loss": 1.5178, + "step": 1583 + }, + { + "epoch": 1.71, + "learning_rate": 8.472788151143692e-06, + "loss": 1.6548, + "step": 1584 + }, + { + "epoch": 1.72, + "learning_rate": 8.460997597963732e-06, + "loss": 1.6223, + "step": 1585 + }, + { + "epoch": 1.72, + "learning_rate": 8.449209235758024e-06, + "loss": 1.441, + "step": 1586 + }, + { + "epoch": 1.72, + "learning_rate": 8.437423081308862e-06, + "loss": 1.6071, + "step": 1587 + }, + { + "epoch": 1.72, + "learning_rate": 8.425639151395406e-06, + "loss": 1.6885, + "step": 1588 + }, + { + "epoch": 1.72, + "learning_rate": 8.413857462793637e-06, + "loss": 1.6291, + "step": 1589 + }, + { + "epoch": 1.72, + "learning_rate": 8.402078032276367e-06, + "loss": 1.4595, + "step": 1590 + }, + { + "epoch": 1.72, + "learning_rate": 8.390300876613164e-06, + "loss": 1.7373, + "step": 1591 + }, + { + "epoch": 1.72, + "learning_rate": 8.378526012570381e-06, + "loss": 1.5518, + "step": 1592 + }, + { + "epoch": 1.72, + "learning_rate": 8.366753456911099e-06, + "loss": 1.7596, + "step": 1593 + }, + { + "epoch": 1.73, + "learning_rate": 8.354983226395114e-06, + "loss": 1.4498, + "step": 1594 + }, + { + "epoch": 1.73, + "learning_rate": 8.343215337778915e-06, + "loss": 1.4796, + "step": 1595 + }, + { + "epoch": 1.73, + "learning_rate": 8.331449807815646e-06, + "loss": 1.5796, + "step": 1596 + }, + { + "epoch": 1.73, + "learning_rate": 8.31968665325511e-06, + "loss": 1.438, + "step": 1597 + }, + { + "epoch": 1.73, + "learning_rate": 8.307925890843711e-06, + "loss": 1.5338, + "step": 1598 + }, + { + "epoch": 1.73, + "learning_rate": 8.296167537324459e-06, + "loss": 1.576, + "step": 1599 + }, + { + "epoch": 1.73, + "learning_rate": 8.284411609436933e-06, + "loss": 1.6412, + "step": 1600 + }, + { + "epoch": 1.73, + "learning_rate": 8.272658123917256e-06, + "loss": 1.297, + "step": 1601 + }, + { + "epoch": 1.73, + "learning_rate": 8.260907097498078e-06, + "loss": 1.865, + "step": 1602 + }, + { + "epoch": 1.73, + "learning_rate": 8.24915854690854e-06, + "loss": 1.5555, + "step": 1603 + }, + { + "epoch": 1.74, + "learning_rate": 8.237412488874266e-06, + "loss": 1.6192, + "step": 1604 + }, + { + "epoch": 1.74, + "learning_rate": 8.225668940117327e-06, + "loss": 1.5302, + "step": 1605 + }, + { + "epoch": 1.74, + "learning_rate": 8.213927917356228e-06, + "loss": 1.6969, + "step": 1606 + }, + { + "epoch": 1.74, + "learning_rate": 8.202189437305874e-06, + "loss": 1.8516, + "step": 1607 + }, + { + "epoch": 1.74, + "learning_rate": 8.190453516677545e-06, + "loss": 1.568, + "step": 1608 + }, + { + "epoch": 1.74, + "learning_rate": 8.178720172178886e-06, + "loss": 1.7605, + "step": 1609 + }, + { + "epoch": 1.74, + "learning_rate": 8.166989420513867e-06, + "loss": 1.2721, + "step": 1610 + }, + { + "epoch": 1.74, + "learning_rate": 8.155261278382773e-06, + "loss": 1.5424, + "step": 1611 + }, + { + "epoch": 1.74, + "learning_rate": 8.143535762482176e-06, + "loss": 1.534, + "step": 1612 + }, + { + "epoch": 1.75, + "learning_rate": 8.131812889504895e-06, + "loss": 1.3295, + "step": 1613 + }, + { + "epoch": 1.75, + "learning_rate": 8.120092676140007e-06, + "loss": 1.4227, + "step": 1614 + }, + { + "epoch": 1.75, + "learning_rate": 8.108375139072781e-06, + "loss": 1.6211, + "step": 1615 + }, + { + "epoch": 1.75, + "learning_rate": 8.096660294984693e-06, + "loss": 1.6758, + "step": 1616 + }, + { + "epoch": 1.75, + "learning_rate": 8.084948160553375e-06, + "loss": 1.7365, + "step": 1617 + }, + { + "epoch": 1.75, + "learning_rate": 8.07323875245261e-06, + "loss": 1.7047, + "step": 1618 + }, + { + "epoch": 1.75, + "learning_rate": 8.061532087352295e-06, + "loss": 1.3846, + "step": 1619 + }, + { + "epoch": 1.75, + "learning_rate": 8.049828181918417e-06, + "loss": 1.5463, + "step": 1620 + }, + { + "epoch": 1.75, + "learning_rate": 8.038127052813044e-06, + "loss": 1.5289, + "step": 1621 + }, + { + "epoch": 1.76, + "learning_rate": 8.026428716694284e-06, + "loss": 1.6241, + "step": 1622 + }, + { + "epoch": 1.76, + "learning_rate": 8.014733190216275e-06, + "loss": 1.7027, + "step": 1623 + }, + { + "epoch": 1.76, + "learning_rate": 8.003040490029145e-06, + "loss": 1.5098, + "step": 1624 + }, + { + "epoch": 1.76, + "learning_rate": 7.991350632779012e-06, + "loss": 1.5048, + "step": 1625 + }, + { + "epoch": 1.76, + "learning_rate": 7.979663635107941e-06, + "loss": 1.521, + "step": 1626 + }, + { + "epoch": 1.76, + "learning_rate": 7.967979513653917e-06, + "loss": 1.6509, + "step": 1627 + }, + { + "epoch": 1.76, + "learning_rate": 7.956298285050842e-06, + "loss": 1.6593, + "step": 1628 + }, + { + "epoch": 1.76, + "learning_rate": 7.944619965928498e-06, + "loss": 1.6461, + "step": 1629 + }, + { + "epoch": 1.76, + "learning_rate": 7.932944572912523e-06, + "loss": 1.6019, + "step": 1630 + }, + { + "epoch": 1.77, + "learning_rate": 7.921272122624384e-06, + "loss": 1.7219, + "step": 1631 + }, + { + "epoch": 1.77, + "learning_rate": 7.909602631681366e-06, + "loss": 1.4021, + "step": 1632 + }, + { + "epoch": 1.77, + "learning_rate": 7.897936116696542e-06, + "loss": 1.5456, + "step": 1633 + }, + { + "epoch": 1.77, + "learning_rate": 7.88627259427874e-06, + "loss": 1.5395, + "step": 1634 + }, + { + "epoch": 1.77, + "learning_rate": 7.87461208103254e-06, + "loss": 1.465, + "step": 1635 + }, + { + "epoch": 1.77, + "learning_rate": 7.862954593558223e-06, + "loss": 1.6116, + "step": 1636 + }, + { + "epoch": 1.77, + "learning_rate": 7.851300148451774e-06, + "loss": 1.7174, + "step": 1637 + }, + { + "epoch": 1.77, + "learning_rate": 7.839648762304841e-06, + "loss": 1.5312, + "step": 1638 + }, + { + "epoch": 1.77, + "learning_rate": 7.828000451704717e-06, + "loss": 1.6013, + "step": 1639 + }, + { + "epoch": 1.77, + "learning_rate": 7.816355233234327e-06, + "loss": 1.5673, + "step": 1640 + }, + { + "epoch": 1.78, + "learning_rate": 7.804713123472178e-06, + "loss": 1.6745, + "step": 1641 + }, + { + "epoch": 1.78, + "learning_rate": 7.793074138992364e-06, + "loss": 1.7497, + "step": 1642 + }, + { + "epoch": 1.78, + "learning_rate": 7.78143829636452e-06, + "loss": 1.6192, + "step": 1643 + }, + { + "epoch": 1.78, + "learning_rate": 7.769805612153816e-06, + "loss": 1.6124, + "step": 1644 + }, + { + "epoch": 1.78, + "learning_rate": 7.758176102920918e-06, + "loss": 1.3206, + "step": 1645 + }, + { + "epoch": 1.78, + "learning_rate": 7.746549785221982e-06, + "loss": 1.6474, + "step": 1646 + }, + { + "epoch": 1.78, + "learning_rate": 7.734926675608616e-06, + "loss": 1.4634, + "step": 1647 + }, + { + "epoch": 1.78, + "learning_rate": 7.723306790627852e-06, + "loss": 1.6819, + "step": 1648 + }, + { + "epoch": 1.78, + "learning_rate": 7.711690146822147e-06, + "loss": 1.6936, + "step": 1649 + }, + { + "epoch": 1.79, + "learning_rate": 7.700076760729328e-06, + "loss": 1.8941, + "step": 1650 + }, + { + "epoch": 1.79, + "learning_rate": 7.6884666488826e-06, + "loss": 1.381, + "step": 1651 + }, + { + "epoch": 1.79, + "learning_rate": 7.67685982781049e-06, + "loss": 1.534, + "step": 1652 + }, + { + "epoch": 1.79, + "learning_rate": 7.665256314036856e-06, + "loss": 1.6982, + "step": 1653 + }, + { + "epoch": 1.79, + "learning_rate": 7.653656124080839e-06, + "loss": 1.5934, + "step": 1654 + }, + { + "epoch": 1.79, + "learning_rate": 7.642059274456848e-06, + "loss": 1.7371, + "step": 1655 + }, + { + "epoch": 1.79, + "learning_rate": 7.630465781674538e-06, + "loss": 1.4087, + "step": 1656 + }, + { + "epoch": 1.79, + "learning_rate": 7.618875662238786e-06, + "loss": 1.612, + "step": 1657 + }, + { + "epoch": 1.79, + "learning_rate": 7.607288932649669e-06, + "loss": 1.7681, + "step": 1658 + }, + { + "epoch": 1.8, + "learning_rate": 7.595705609402427e-06, + "loss": 1.7609, + "step": 1659 + }, + { + "epoch": 1.8, + "learning_rate": 7.584125708987464e-06, + "loss": 1.6934, + "step": 1660 + }, + { + "epoch": 1.8, + "learning_rate": 7.5725492478903065e-06, + "loss": 1.4336, + "step": 1661 + }, + { + "epoch": 1.8, + "learning_rate": 7.56097624259158e-06, + "loss": 1.712, + "step": 1662 + }, + { + "epoch": 1.8, + "learning_rate": 7.549406709567001e-06, + "loss": 1.6162, + "step": 1663 + }, + { + "epoch": 1.8, + "learning_rate": 7.537840665287327e-06, + "loss": 1.5464, + "step": 1664 + }, + { + "epoch": 1.8, + "learning_rate": 7.526278126218365e-06, + "loss": 1.6153, + "step": 1665 + }, + { + "epoch": 1.8, + "learning_rate": 7.514719108820918e-06, + "loss": 1.408, + "step": 1666 + }, + { + "epoch": 1.8, + "learning_rate": 7.5031636295507845e-06, + "loss": 1.5711, + "step": 1667 + }, + { + "epoch": 1.81, + "learning_rate": 7.491611704858727e-06, + "loss": 1.4098, + "step": 1668 + }, + { + "epoch": 1.81, + "learning_rate": 7.480063351190439e-06, + "loss": 1.3499, + "step": 1669 + }, + { + "epoch": 1.81, + "learning_rate": 7.46851858498654e-06, + "loss": 1.4203, + "step": 1670 + }, + { + "epoch": 1.81, + "learning_rate": 7.456977422682532e-06, + "loss": 1.4782, + "step": 1671 + }, + { + "epoch": 1.81, + "learning_rate": 7.445439880708796e-06, + "loss": 1.5853, + "step": 1672 + }, + { + "epoch": 1.81, + "learning_rate": 7.433905975490552e-06, + "loss": 1.5873, + "step": 1673 + }, + { + "epoch": 1.81, + "learning_rate": 7.422375723447846e-06, + "loss": 1.7037, + "step": 1674 + }, + { + "epoch": 1.81, + "learning_rate": 7.410849140995528e-06, + "loss": 1.5418, + "step": 1675 + }, + { + "epoch": 1.81, + "learning_rate": 7.399326244543211e-06, + "loss": 1.5074, + "step": 1676 + }, + { + "epoch": 1.81, + "learning_rate": 7.387807050495275e-06, + "loss": 1.7242, + "step": 1677 + }, + { + "epoch": 1.82, + "learning_rate": 7.376291575250812e-06, + "loss": 1.4759, + "step": 1678 + }, + { + "epoch": 1.82, + "learning_rate": 7.3647798352036394e-06, + "loss": 1.3884, + "step": 1679 + }, + { + "epoch": 1.82, + "learning_rate": 7.3532718467422415e-06, + "loss": 1.5181, + "step": 1680 + }, + { + "epoch": 1.82, + "learning_rate": 7.34176762624977e-06, + "loss": 1.7005, + "step": 1681 + }, + { + "epoch": 1.82, + "learning_rate": 7.330267190104015e-06, + "loss": 1.431, + "step": 1682 + }, + { + "epoch": 1.82, + "learning_rate": 7.318770554677366e-06, + "loss": 1.8979, + "step": 1683 + }, + { + "epoch": 1.82, + "learning_rate": 7.307277736336815e-06, + "loss": 1.5267, + "step": 1684 + }, + { + "epoch": 1.82, + "learning_rate": 7.295788751443913e-06, + "loss": 1.5903, + "step": 1685 + }, + { + "epoch": 1.82, + "learning_rate": 7.284303616354759e-06, + "loss": 1.253, + "step": 1686 + }, + { + "epoch": 1.83, + "learning_rate": 7.27282234741996e-06, + "loss": 1.2221, + "step": 1687 + }, + { + "epoch": 1.83, + "learning_rate": 7.261344960984631e-06, + "loss": 1.7133, + "step": 1688 + }, + { + "epoch": 1.83, + "learning_rate": 7.249871473388361e-06, + "loss": 1.5898, + "step": 1689 + }, + { + "epoch": 1.83, + "learning_rate": 7.238401900965172e-06, + "loss": 1.8137, + "step": 1690 + }, + { + "epoch": 1.83, + "learning_rate": 7.226936260043531e-06, + "loss": 1.3861, + "step": 1691 + }, + { + "epoch": 1.83, + "learning_rate": 7.215474566946296e-06, + "loss": 1.7047, + "step": 1692 + }, + { + "epoch": 1.83, + "learning_rate": 7.204016837990715e-06, + "loss": 1.4074, + "step": 1693 + }, + { + "epoch": 1.83, + "learning_rate": 7.192563089488377e-06, + "loss": 1.4883, + "step": 1694 + }, + { + "epoch": 1.83, + "learning_rate": 7.181113337745218e-06, + "loss": 1.5436, + "step": 1695 + }, + { + "epoch": 1.84, + "learning_rate": 7.169667599061484e-06, + "loss": 1.3418, + "step": 1696 + }, + { + "epoch": 1.84, + "learning_rate": 7.158225889731698e-06, + "loss": 1.5738, + "step": 1697 + }, + { + "epoch": 1.84, + "learning_rate": 7.146788226044658e-06, + "loss": 1.6208, + "step": 1698 + }, + { + "epoch": 1.84, + "learning_rate": 7.1353546242833885e-06, + "loss": 1.2694, + "step": 1699 + }, + { + "epoch": 1.84, + "learning_rate": 7.12392510072515e-06, + "loss": 1.5973, + "step": 1700 + }, + { + "epoch": 1.84, + "learning_rate": 7.112499671641375e-06, + "loss": 1.6895, + "step": 1701 + }, + { + "epoch": 1.84, + "learning_rate": 7.1010783532976866e-06, + "loss": 1.3201, + "step": 1702 + }, + { + "epoch": 1.84, + "learning_rate": 7.089661161953847e-06, + "loss": 1.6054, + "step": 1703 + }, + { + "epoch": 1.84, + "learning_rate": 7.078248113863741e-06, + "loss": 1.6929, + "step": 1704 + }, + { + "epoch": 1.85, + "learning_rate": 7.066839225275366e-06, + "loss": 1.347, + "step": 1705 + }, + { + "epoch": 1.85, + "learning_rate": 7.0554345124307765e-06, + "loss": 1.6863, + "step": 1706 + }, + { + "epoch": 1.85, + "learning_rate": 7.044033991566106e-06, + "loss": 1.659, + "step": 1707 + }, + { + "epoch": 1.85, + "learning_rate": 7.032637678911504e-06, + "loss": 1.6838, + "step": 1708 + }, + { + "epoch": 1.85, + "learning_rate": 7.021245590691136e-06, + "loss": 1.4575, + "step": 1709 + }, + { + "epoch": 1.85, + "learning_rate": 7.009857743123156e-06, + "loss": 1.5452, + "step": 1710 + }, + { + "epoch": 1.85, + "learning_rate": 6.998474152419672e-06, + "loss": 1.5975, + "step": 1711 + }, + { + "epoch": 1.85, + "learning_rate": 6.987094834786739e-06, + "loss": 1.6389, + "step": 1712 + }, + { + "epoch": 1.85, + "learning_rate": 6.975719806424324e-06, + "loss": 1.7488, + "step": 1713 + }, + { + "epoch": 1.85, + "learning_rate": 6.964349083526295e-06, + "loss": 1.6343, + "step": 1714 + }, + { + "epoch": 1.86, + "learning_rate": 6.95298268228038e-06, + "loss": 1.8227, + "step": 1715 + }, + { + "epoch": 1.86, + "learning_rate": 6.941620618868161e-06, + "loss": 1.8224, + "step": 1716 + }, + { + "epoch": 1.86, + "learning_rate": 6.930262909465051e-06, + "loss": 1.1914, + "step": 1717 + }, + { + "epoch": 1.86, + "learning_rate": 6.918909570240249e-06, + "loss": 1.7999, + "step": 1718 + }, + { + "epoch": 1.86, + "learning_rate": 6.9075606173567435e-06, + "loss": 1.3275, + "step": 1719 + }, + { + "epoch": 1.86, + "learning_rate": 6.8962160669712755e-06, + "loss": 1.6261, + "step": 1720 + }, + { + "epoch": 1.86, + "learning_rate": 6.884875935234323e-06, + "loss": 1.3259, + "step": 1721 + }, + { + "epoch": 1.86, + "learning_rate": 6.8735402382900605e-06, + "loss": 1.5051, + "step": 1722 + }, + { + "epoch": 1.86, + "learning_rate": 6.862208992276362e-06, + "loss": 1.447, + "step": 1723 + }, + { + "epoch": 1.87, + "learning_rate": 6.850882213324766e-06, + "loss": 1.863, + "step": 1724 + }, + { + "epoch": 1.87, + "learning_rate": 6.839559917560437e-06, + "loss": 1.5845, + "step": 1725 + }, + { + "epoch": 1.87, + "learning_rate": 6.828242121102173e-06, + "loss": 1.5892, + "step": 1726 + }, + { + "epoch": 1.87, + "learning_rate": 6.8169288400623555e-06, + "loss": 1.5102, + "step": 1727 + }, + { + "epoch": 1.87, + "learning_rate": 6.805620090546947e-06, + "loss": 1.3536, + "step": 1728 + }, + { + "epoch": 1.87, + "learning_rate": 6.794315888655446e-06, + "loss": 1.7994, + "step": 1729 + }, + { + "epoch": 1.87, + "learning_rate": 6.783016250480891e-06, + "loss": 1.5345, + "step": 1730 + }, + { + "epoch": 1.87, + "learning_rate": 6.771721192109817e-06, + "loss": 1.7666, + "step": 1731 + }, + { + "epoch": 1.87, + "learning_rate": 6.760430729622236e-06, + "loss": 1.5554, + "step": 1732 + }, + { + "epoch": 1.88, + "learning_rate": 6.749144879091626e-06, + "loss": 1.6225, + "step": 1733 + }, + { + "epoch": 1.88, + "learning_rate": 6.737863656584886e-06, + "loss": 1.8757, + "step": 1734 + }, + { + "epoch": 1.88, + "learning_rate": 6.72658707816234e-06, + "loss": 1.6269, + "step": 1735 + }, + { + "epoch": 1.88, + "learning_rate": 6.715315159877691e-06, + "loss": 1.7373, + "step": 1736 + }, + { + "epoch": 1.88, + "learning_rate": 6.704047917778011e-06, + "loss": 1.5422, + "step": 1737 + }, + { + "epoch": 1.88, + "learning_rate": 6.692785367903723e-06, + "loss": 1.4293, + "step": 1738 + }, + { + "epoch": 1.88, + "learning_rate": 6.681527526288549e-06, + "loss": 1.6787, + "step": 1739 + }, + { + "epoch": 1.88, + "learning_rate": 6.670274408959535e-06, + "loss": 1.5579, + "step": 1740 + }, + { + "epoch": 1.88, + "learning_rate": 6.6590260319369745e-06, + "loss": 1.5948, + "step": 1741 + }, + { + "epoch": 1.89, + "learning_rate": 6.647782411234436e-06, + "loss": 1.419, + "step": 1742 + }, + { + "epoch": 1.89, + "learning_rate": 6.636543562858697e-06, + "loss": 1.7773, + "step": 1743 + }, + { + "epoch": 1.89, + "learning_rate": 6.625309502809758e-06, + "loss": 1.464, + "step": 1744 + }, + { + "epoch": 1.89, + "learning_rate": 6.614080247080794e-06, + "loss": 1.4038, + "step": 1745 + }, + { + "epoch": 1.89, + "learning_rate": 6.602855811658138e-06, + "loss": 1.5296, + "step": 1746 + }, + { + "epoch": 1.89, + "learning_rate": 6.591636212521266e-06, + "loss": 1.5991, + "step": 1747 + }, + { + "epoch": 1.89, + "learning_rate": 6.580421465642767e-06, + "loss": 1.5339, + "step": 1748 + }, + { + "epoch": 1.89, + "learning_rate": 6.569211586988324e-06, + "loss": 1.6199, + "step": 1749 + }, + { + "epoch": 1.89, + "learning_rate": 6.558006592516683e-06, + "loss": 1.3909, + "step": 1750 + }, + { + "epoch": 1.9, + "learning_rate": 6.546806498179643e-06, + "loss": 1.5212, + "step": 1751 + }, + { + "epoch": 1.9, + "learning_rate": 6.535611319922031e-06, + "loss": 1.6971, + "step": 1752 + }, + { + "epoch": 1.9, + "learning_rate": 6.524421073681659e-06, + "loss": 1.411, + "step": 1753 + }, + { + "epoch": 1.9, + "learning_rate": 6.513235775389338e-06, + "loss": 1.5912, + "step": 1754 + }, + { + "epoch": 1.9, + "learning_rate": 6.502055440968819e-06, + "loss": 1.4404, + "step": 1755 + }, + { + "epoch": 1.9, + "learning_rate": 6.490880086336797e-06, + "loss": 1.4707, + "step": 1756 + }, + { + "epoch": 1.9, + "learning_rate": 6.479709727402868e-06, + "loss": 1.4696, + "step": 1757 + }, + { + "epoch": 1.9, + "learning_rate": 6.468544380069523e-06, + "loss": 1.7753, + "step": 1758 + }, + { + "epoch": 1.9, + "learning_rate": 6.457384060232119e-06, + "loss": 1.6027, + "step": 1759 + }, + { + "epoch": 1.9, + "learning_rate": 6.4462287837788495e-06, + "loss": 1.5656, + "step": 1760 + }, + { + "epoch": 1.91, + "learning_rate": 6.435078566590738e-06, + "loss": 1.4584, + "step": 1761 + }, + { + "epoch": 1.91, + "learning_rate": 6.423933424541588e-06, + "loss": 1.3464, + "step": 1762 + }, + { + "epoch": 1.91, + "learning_rate": 6.412793373497999e-06, + "loss": 1.6881, + "step": 1763 + }, + { + "epoch": 1.91, + "learning_rate": 6.401658429319305e-06, + "loss": 1.6128, + "step": 1764 + }, + { + "epoch": 1.91, + "learning_rate": 6.390528607857582e-06, + "loss": 1.818, + "step": 1765 + }, + { + "epoch": 1.91, + "learning_rate": 6.379403924957612e-06, + "loss": 1.859, + "step": 1766 + }, + { + "epoch": 1.91, + "learning_rate": 6.36828439645685e-06, + "loss": 1.5989, + "step": 1767 + }, + { + "epoch": 1.91, + "learning_rate": 6.357170038185428e-06, + "loss": 1.6319, + "step": 1768 + }, + { + "epoch": 1.91, + "learning_rate": 6.346060865966104e-06, + "loss": 1.4426, + "step": 1769 + }, + { + "epoch": 1.92, + "learning_rate": 6.3349568956142636e-06, + "loss": 1.8158, + "step": 1770 + }, + { + "epoch": 1.92, + "learning_rate": 6.3238581429378846e-06, + "loss": 1.7956, + "step": 1771 + }, + { + "epoch": 1.92, + "learning_rate": 6.312764623737511e-06, + "loss": 1.5887, + "step": 1772 + }, + { + "epoch": 1.92, + "learning_rate": 6.301676353806245e-06, + "loss": 1.4651, + "step": 1773 + }, + { + "epoch": 1.92, + "learning_rate": 6.2905933489297055e-06, + "loss": 1.4191, + "step": 1774 + }, + { + "epoch": 1.92, + "learning_rate": 6.279515624886024e-06, + "loss": 1.4645, + "step": 1775 + }, + { + "epoch": 1.92, + "learning_rate": 6.26844319744581e-06, + "loss": 1.6481, + "step": 1776 + }, + { + "epoch": 1.92, + "learning_rate": 6.257376082372138e-06, + "loss": 1.5659, + "step": 1777 + }, + { + "epoch": 1.92, + "learning_rate": 6.246314295420515e-06, + "loss": 1.3886, + "step": 1778 + }, + { + "epoch": 1.93, + "learning_rate": 6.235257852338857e-06, + "loss": 1.4936, + "step": 1779 + }, + { + "epoch": 1.93, + "learning_rate": 6.2242067688674876e-06, + "loss": 1.5716, + "step": 1780 + }, + { + "epoch": 1.93, + "learning_rate": 6.2131610607390835e-06, + "loss": 1.5199, + "step": 1781 + }, + { + "epoch": 1.93, + "learning_rate": 6.202120743678682e-06, + "loss": 1.5851, + "step": 1782 + }, + { + "epoch": 1.93, + "learning_rate": 6.191085833403636e-06, + "loss": 1.567, + "step": 1783 + }, + { + "epoch": 1.93, + "learning_rate": 6.180056345623608e-06, + "loss": 1.8849, + "step": 1784 + }, + { + "epoch": 1.93, + "learning_rate": 6.169032296040542e-06, + "loss": 1.4365, + "step": 1785 + }, + { + "epoch": 1.93, + "learning_rate": 6.158013700348628e-06, + "loss": 1.6608, + "step": 1786 + }, + { + "epoch": 1.93, + "learning_rate": 6.1470005742343075e-06, + "loss": 1.5282, + "step": 1787 + }, + { + "epoch": 1.94, + "learning_rate": 6.1359929333762206e-06, + "loss": 1.8344, + "step": 1788 + }, + { + "epoch": 1.94, + "learning_rate": 6.124990793445214e-06, + "loss": 1.7696, + "step": 1789 + }, + { + "epoch": 1.94, + "learning_rate": 6.113994170104285e-06, + "loss": 1.4385, + "step": 1790 + }, + { + "epoch": 1.94, + "learning_rate": 6.103003079008593e-06, + "loss": 1.5002, + "step": 1791 + }, + { + "epoch": 1.94, + "learning_rate": 6.0920175358054166e-06, + "loss": 1.7646, + "step": 1792 + }, + { + "epoch": 1.94, + "learning_rate": 6.08103755613413e-06, + "loss": 1.5009, + "step": 1793 + }, + { + "epoch": 1.94, + "learning_rate": 6.070063155626197e-06, + "loss": 1.6243, + "step": 1794 + }, + { + "epoch": 1.94, + "learning_rate": 6.059094349905128e-06, + "loss": 1.5057, + "step": 1795 + }, + { + "epoch": 1.94, + "learning_rate": 6.048131154586483e-06, + "loss": 1.3089, + "step": 1796 + }, + { + "epoch": 1.94, + "learning_rate": 6.037173585277816e-06, + "loss": 1.7252, + "step": 1797 + }, + { + "epoch": 1.95, + "learning_rate": 6.026221657578688e-06, + "loss": 1.8852, + "step": 1798 + }, + { + "epoch": 1.95, + "learning_rate": 6.015275387080621e-06, + "loss": 1.7413, + "step": 1799 + }, + { + "epoch": 1.95, + "learning_rate": 6.004334789367083e-06, + "loss": 1.6771, + "step": 1800 + }, + { + "epoch": 1.95, + "learning_rate": 5.9933998800134726e-06, + "loss": 1.5189, + "step": 1801 + }, + { + "epoch": 1.95, + "learning_rate": 5.982470674587078e-06, + "loss": 1.631, + "step": 1802 + }, + { + "epoch": 1.95, + "learning_rate": 5.971547188647078e-06, + "loss": 1.4991, + "step": 1803 + }, + { + "epoch": 1.95, + "learning_rate": 5.9606294377445006e-06, + "loss": 1.5662, + "step": 1804 + }, + { + "epoch": 1.95, + "learning_rate": 5.949717437422222e-06, + "loss": 1.6073, + "step": 1805 + }, + { + "epoch": 1.95, + "learning_rate": 5.938811203214918e-06, + "loss": 1.8055, + "step": 1806 + }, + { + "epoch": 1.96, + "learning_rate": 5.92791075064906e-06, + "loss": 1.7905, + "step": 1807 + }, + { + "epoch": 1.96, + "learning_rate": 5.917016095242893e-06, + "loss": 1.5288, + "step": 1808 + }, + { + "epoch": 1.96, + "learning_rate": 5.9061272525064015e-06, + "loss": 1.9134, + "step": 1809 + }, + { + "epoch": 1.96, + "learning_rate": 5.8952442379413045e-06, + "loss": 1.551, + "step": 1810 + }, + { + "epoch": 1.96, + "learning_rate": 5.88436706704101e-06, + "loss": 1.7406, + "step": 1811 + }, + { + "epoch": 1.96, + "learning_rate": 5.873495755290621e-06, + "loss": 1.7211, + "step": 1812 + }, + { + "epoch": 1.96, + "learning_rate": 5.862630318166896e-06, + "loss": 1.9039, + "step": 1813 + }, + { + "epoch": 1.96, + "learning_rate": 5.851770771138218e-06, + "loss": 1.1796, + "step": 1814 + }, + { + "epoch": 1.96, + "learning_rate": 5.840917129664602e-06, + "loss": 1.676, + "step": 1815 + }, + { + "epoch": 1.97, + "learning_rate": 5.830069409197645e-06, + "loss": 1.4525, + "step": 1816 + }, + { + "epoch": 1.97, + "learning_rate": 5.819227625180517e-06, + "loss": 1.6511, + "step": 1817 + }, + { + "epoch": 1.97, + "learning_rate": 5.8083917930479365e-06, + "loss": 1.8143, + "step": 1818 + }, + { + "epoch": 1.97, + "learning_rate": 5.797561928226145e-06, + "loss": 1.331, + "step": 1819 + }, + { + "epoch": 1.97, + "learning_rate": 5.786738046132902e-06, + "loss": 1.7415, + "step": 1820 + }, + { + "epoch": 1.97, + "learning_rate": 5.775920162177437e-06, + "loss": 1.6579, + "step": 1821 + }, + { + "epoch": 1.97, + "learning_rate": 5.765108291760443e-06, + "loss": 1.4423, + "step": 1822 + }, + { + "epoch": 1.97, + "learning_rate": 5.754302450274053e-06, + "loss": 1.343, + "step": 1823 + }, + { + "epoch": 1.97, + "learning_rate": 5.743502653101818e-06, + "loss": 1.4121, + "step": 1824 + }, + { + "epoch": 1.98, + "learning_rate": 5.732708915618679e-06, + "loss": 1.5275, + "step": 1825 + }, + { + "epoch": 1.98, + "learning_rate": 5.72192125319096e-06, + "loss": 1.8203, + "step": 1826 + }, + { + "epoch": 1.98, + "learning_rate": 5.711139681176332e-06, + "loss": 1.4243, + "step": 1827 + }, + { + "epoch": 1.98, + "learning_rate": 5.700364214923788e-06, + "loss": 1.4667, + "step": 1828 + }, + { + "epoch": 1.98, + "learning_rate": 5.689594869773639e-06, + "loss": 1.7091, + "step": 1829 + }, + { + "epoch": 1.98, + "learning_rate": 5.678831661057473e-06, + "loss": 1.4341, + "step": 1830 + }, + { + "epoch": 1.98, + "learning_rate": 5.668074604098157e-06, + "loss": 1.7594, + "step": 1831 + }, + { + "epoch": 1.98, + "learning_rate": 5.657323714209775e-06, + "loss": 1.7115, + "step": 1832 + }, + { + "epoch": 1.98, + "learning_rate": 5.646579006697659e-06, + "loss": 1.6253, + "step": 1833 + }, + { + "epoch": 1.98, + "learning_rate": 5.63584049685832e-06, + "loss": 2.0217, + "step": 1834 + }, + { + "epoch": 1.99, + "learning_rate": 5.625108199979454e-06, + "loss": 1.6056, + "step": 1835 + }, + { + "epoch": 1.99, + "learning_rate": 5.614382131339911e-06, + "loss": 1.8108, + "step": 1836 + }, + { + "epoch": 1.99, + "learning_rate": 5.603662306209671e-06, + "loss": 1.7316, + "step": 1837 + }, + { + "epoch": 1.99, + "learning_rate": 5.592948739849838e-06, + "loss": 1.6777, + "step": 1838 + }, + { + "epoch": 1.99, + "learning_rate": 5.58224144751258e-06, + "loss": 1.7195, + "step": 1839 + }, + { + "epoch": 1.99, + "learning_rate": 5.5715404444411615e-06, + "loss": 1.2523, + "step": 1840 + }, + { + "epoch": 1.99, + "learning_rate": 5.560845745869879e-06, + "loss": 1.5651, + "step": 1841 + }, + { + "epoch": 1.99, + "learning_rate": 5.55015736702405e-06, + "loss": 1.7505, + "step": 1842 + }, + { + "epoch": 1.99, + "learning_rate": 5.539475323120014e-06, + "loss": 1.5079, + "step": 1843 + }, + { + "epoch": 2.0, + "learning_rate": 5.528799629365062e-06, + "loss": 1.4314, + "step": 1844 + }, + { + "epoch": 2.0, + "learning_rate": 5.518130300957476e-06, + "loss": 1.6132, + "step": 1845 + }, + { + "epoch": 2.0, + "learning_rate": 5.507467353086449e-06, + "loss": 1.3751, + "step": 1846 + }, + { + "epoch": 2.0, + "learning_rate": 5.496810800932113e-06, + "loss": 1.6673, + "step": 1847 + }, + { + "epoch": 2.0, + "learning_rate": 5.4861606596654805e-06, + "loss": 1.2954, + "step": 1848 + }, + { + "epoch": 2.0, + "learning_rate": 5.475516944448437e-06, + "loss": 1.4659, + "step": 1849 + }, + { + "epoch": 2.0, + "learning_rate": 5.464879670433738e-06, + "loss": 1.3197, + "step": 1850 + }, + { + "epoch": 2.0, + "learning_rate": 5.4542488527649385e-06, + "loss": 1.5441, + "step": 1851 + }, + { + "epoch": 2.0, + "learning_rate": 5.443624506576433e-06, + "loss": 1.3866, + "step": 1852 + }, + { + "epoch": 2.01, + "learning_rate": 5.433006646993379e-06, + "loss": 1.5287, + "step": 1853 + }, + { + "epoch": 2.01, + "learning_rate": 5.422395289131712e-06, + "loss": 1.4827, + "step": 1854 + }, + { + "epoch": 2.01, + "learning_rate": 5.411790448098117e-06, + "loss": 1.4118, + "step": 1855 + }, + { + "epoch": 2.01, + "learning_rate": 5.401192138989979e-06, + "loss": 1.399, + "step": 1856 + }, + { + "epoch": 2.01, + "learning_rate": 5.390600376895413e-06, + "loss": 1.5438, + "step": 1857 + }, + { + "epoch": 2.01, + "learning_rate": 5.380015176893183e-06, + "loss": 1.5333, + "step": 1858 + }, + { + "epoch": 2.01, + "learning_rate": 5.369436554052738e-06, + "loss": 1.2251, + "step": 1859 + }, + { + "epoch": 2.01, + "learning_rate": 5.358864523434148e-06, + "loss": 1.6184, + "step": 1860 + }, + { + "epoch": 2.01, + "learning_rate": 5.348299100088101e-06, + "loss": 1.3506, + "step": 1861 + }, + { + "epoch": 2.02, + "learning_rate": 5.3377402990558876e-06, + "loss": 1.478, + "step": 1862 + }, + { + "epoch": 2.02, + "learning_rate": 5.32718813536935e-06, + "loss": 1.1999, + "step": 1863 + }, + { + "epoch": 2.02, + "learning_rate": 5.316642624050905e-06, + "loss": 1.5515, + "step": 1864 + }, + { + "epoch": 2.02, + "learning_rate": 5.306103780113484e-06, + "loss": 1.4353, + "step": 1865 + }, + { + "epoch": 2.02, + "learning_rate": 5.295571618560531e-06, + "loss": 1.7013, + "step": 1866 + }, + { + "epoch": 2.02, + "learning_rate": 5.285046154385976e-06, + "loss": 1.6689, + "step": 1867 + }, + { + "epoch": 2.02, + "learning_rate": 5.274527402574212e-06, + "loss": 1.5817, + "step": 1868 + }, + { + "epoch": 2.02, + "learning_rate": 5.264015378100088e-06, + "loss": 1.3292, + "step": 1869 + }, + { + "epoch": 2.02, + "learning_rate": 5.2535100959288534e-06, + "loss": 1.6756, + "step": 1870 + }, + { + "epoch": 2.02, + "learning_rate": 5.243011571016181e-06, + "loss": 1.7555, + "step": 1871 + }, + { + "epoch": 2.03, + "learning_rate": 5.232519818308115e-06, + "loss": 1.4063, + "step": 1872 + }, + { + "epoch": 2.03, + "learning_rate": 5.222034852741054e-06, + "loss": 1.3522, + "step": 1873 + }, + { + "epoch": 2.03, + "learning_rate": 5.211556689241742e-06, + "loss": 1.4141, + "step": 1874 + }, + { + "epoch": 2.03, + "learning_rate": 5.20108534272723e-06, + "loss": 1.3918, + "step": 1875 + }, + { + "epoch": 2.03, + "learning_rate": 5.190620828104878e-06, + "loss": 1.4967, + "step": 1876 + }, + { + "epoch": 2.03, + "learning_rate": 5.180163160272309e-06, + "loss": 1.2588, + "step": 1877 + }, + { + "epoch": 2.03, + "learning_rate": 5.1697123541174025e-06, + "loss": 1.5597, + "step": 1878 + }, + { + "epoch": 2.03, + "learning_rate": 5.159268424518264e-06, + "loss": 1.2347, + "step": 1879 + }, + { + "epoch": 2.03, + "learning_rate": 5.14883138634322e-06, + "loss": 1.5946, + "step": 1880 + }, + { + "epoch": 2.04, + "learning_rate": 5.138401254450773e-06, + "loss": 1.51, + "step": 1881 + }, + { + "epoch": 2.04, + "learning_rate": 5.127978043689607e-06, + "loss": 1.4591, + "step": 1882 + }, + { + "epoch": 2.04, + "learning_rate": 5.1175617688985446e-06, + "loss": 1.5665, + "step": 1883 + }, + { + "epoch": 2.04, + "learning_rate": 5.107152444906536e-06, + "loss": 1.2853, + "step": 1884 + }, + { + "epoch": 2.04, + "learning_rate": 5.0967500865326334e-06, + "loss": 1.6581, + "step": 1885 + }, + { + "epoch": 2.04, + "learning_rate": 5.086354708585979e-06, + "loss": 1.6065, + "step": 1886 + }, + { + "epoch": 2.04, + "learning_rate": 5.07596632586577e-06, + "loss": 1.4056, + "step": 1887 + }, + { + "epoch": 2.04, + "learning_rate": 5.0655849531612465e-06, + "loss": 1.4406, + "step": 1888 + }, + { + "epoch": 2.04, + "learning_rate": 5.055210605251678e-06, + "loss": 1.2587, + "step": 1889 + }, + { + "epoch": 2.05, + "learning_rate": 5.0448432969063235e-06, + "loss": 1.4093, + "step": 1890 + }, + { + "epoch": 2.05, + "learning_rate": 5.034483042884419e-06, + "loss": 1.601, + "step": 1891 + }, + { + "epoch": 2.05, + "learning_rate": 5.024129857935167e-06, + "loss": 1.3906, + "step": 1892 + }, + { + "epoch": 2.05, + "learning_rate": 5.0137837567976926e-06, + "loss": 1.2058, + "step": 1893 + }, + { + "epoch": 2.05, + "learning_rate": 5.003444754201059e-06, + "loss": 1.372, + "step": 1894 + }, + { + "epoch": 2.05, + "learning_rate": 4.993112864864191e-06, + "loss": 1.65, + "step": 1895 + }, + { + "epoch": 2.05, + "learning_rate": 4.982788103495918e-06, + "loss": 1.2757, + "step": 1896 + }, + { + "epoch": 2.05, + "learning_rate": 4.972470484794906e-06, + "loss": 1.3368, + "step": 1897 + }, + { + "epoch": 2.05, + "learning_rate": 4.962160023449652e-06, + "loss": 1.4221, + "step": 1898 + }, + { + "epoch": 2.06, + "learning_rate": 4.951856734138469e-06, + "loss": 1.3768, + "step": 1899 + }, + { + "epoch": 2.06, + "learning_rate": 4.941560631529452e-06, + "loss": 1.1427, + "step": 1900 + }, + { + "epoch": 2.06, + "learning_rate": 4.931271730280482e-06, + "loss": 1.3772, + "step": 1901 + }, + { + "epoch": 2.06, + "learning_rate": 4.92099004503916e-06, + "loss": 1.3651, + "step": 1902 + }, + { + "epoch": 2.06, + "learning_rate": 4.9107155904428435e-06, + "loss": 1.3227, + "step": 1903 + }, + { + "epoch": 2.06, + "learning_rate": 4.900448381118578e-06, + "loss": 1.3926, + "step": 1904 + }, + { + "epoch": 2.06, + "learning_rate": 4.890188431683095e-06, + "loss": 1.4526, + "step": 1905 + }, + { + "epoch": 2.06, + "learning_rate": 4.879935756742805e-06, + "loss": 1.2755, + "step": 1906 + }, + { + "epoch": 2.06, + "learning_rate": 4.869690370893739e-06, + "loss": 1.4351, + "step": 1907 + }, + { + "epoch": 2.06, + "learning_rate": 4.8594522887215776e-06, + "loss": 1.5472, + "step": 1908 + }, + { + "epoch": 2.07, + "learning_rate": 4.849221524801576e-06, + "loss": 1.5711, + "step": 1909 + }, + { + "epoch": 2.07, + "learning_rate": 4.838998093698596e-06, + "loss": 1.3841, + "step": 1910 + }, + { + "epoch": 2.07, + "learning_rate": 4.828782009967044e-06, + "loss": 1.2828, + "step": 1911 + }, + { + "epoch": 2.07, + "learning_rate": 4.81857328815087e-06, + "loss": 1.5097, + "step": 1912 + }, + { + "epoch": 2.07, + "learning_rate": 4.808371942783554e-06, + "loss": 1.1843, + "step": 1913 + }, + { + "epoch": 2.07, + "learning_rate": 4.798177988388052e-06, + "loss": 1.3296, + "step": 1914 + }, + { + "epoch": 2.07, + "learning_rate": 4.7879914394768225e-06, + "loss": 1.4215, + "step": 1915 + }, + { + "epoch": 2.07, + "learning_rate": 4.777812310551767e-06, + "loss": 1.4029, + "step": 1916 + }, + { + "epoch": 2.07, + "learning_rate": 4.767640616104222e-06, + "loss": 1.6383, + "step": 1917 + }, + { + "epoch": 2.08, + "learning_rate": 4.757476370614957e-06, + "loss": 1.2297, + "step": 1918 + }, + { + "epoch": 2.08, + "learning_rate": 4.74731958855411e-06, + "loss": 1.2133, + "step": 1919 + }, + { + "epoch": 2.08, + "learning_rate": 4.737170284381224e-06, + "loss": 1.3092, + "step": 1920 + }, + { + "epoch": 2.08, + "learning_rate": 4.727028472545165e-06, + "loss": 1.4945, + "step": 1921 + }, + { + "epoch": 2.08, + "learning_rate": 4.716894167484161e-06, + "loss": 1.5155, + "step": 1922 + }, + { + "epoch": 2.08, + "learning_rate": 4.706767383625737e-06, + "loss": 1.4136, + "step": 1923 + }, + { + "epoch": 2.08, + "learning_rate": 4.6966481353867085e-06, + "loss": 1.7102, + "step": 1924 + }, + { + "epoch": 2.08, + "learning_rate": 4.6865364371731825e-06, + "loss": 1.2092, + "step": 1925 + }, + { + "epoch": 2.08, + "learning_rate": 4.6764323033804895e-06, + "loss": 1.5983, + "step": 1926 + }, + { + "epoch": 2.09, + "learning_rate": 4.666335748393214e-06, + "loss": 1.3997, + "step": 1927 + }, + { + "epoch": 2.09, + "learning_rate": 4.65624678658514e-06, + "loss": 1.6628, + "step": 1928 + }, + { + "epoch": 2.09, + "learning_rate": 4.646165432319242e-06, + "loss": 1.5539, + "step": 1929 + }, + { + "epoch": 2.09, + "learning_rate": 4.636091699947675e-06, + "loss": 1.3694, + "step": 1930 + }, + { + "epoch": 2.09, + "learning_rate": 4.62602560381172e-06, + "loss": 1.4263, + "step": 1931 + }, + { + "epoch": 2.09, + "learning_rate": 4.615967158241814e-06, + "loss": 1.5073, + "step": 1932 + }, + { + "epoch": 2.09, + "learning_rate": 4.6059163775574856e-06, + "loss": 1.4607, + "step": 1933 + }, + { + "epoch": 2.09, + "learning_rate": 4.595873276067354e-06, + "loss": 1.3914, + "step": 1934 + }, + { + "epoch": 2.09, + "learning_rate": 4.5858378680691085e-06, + "loss": 1.6595, + "step": 1935 + }, + { + "epoch": 2.1, + "learning_rate": 4.575810167849481e-06, + "loss": 1.4194, + "step": 1936 + }, + { + "epoch": 2.1, + "learning_rate": 4.565790189684247e-06, + "loss": 1.5239, + "step": 1937 + }, + { + "epoch": 2.1, + "learning_rate": 4.555777947838158e-06, + "loss": 1.4987, + "step": 1938 + }, + { + "epoch": 2.1, + "learning_rate": 4.545773456564983e-06, + "loss": 1.5604, + "step": 1939 + }, + { + "epoch": 2.1, + "learning_rate": 4.535776730107438e-06, + "loss": 1.4635, + "step": 1940 + }, + { + "epoch": 2.1, + "learning_rate": 4.525787782697191e-06, + "loss": 1.3534, + "step": 1941 + }, + { + "epoch": 2.1, + "learning_rate": 4.515806628554835e-06, + "loss": 1.3225, + "step": 1942 + }, + { + "epoch": 2.1, + "learning_rate": 4.505833281889866e-06, + "loss": 1.2723, + "step": 1943 + }, + { + "epoch": 2.1, + "learning_rate": 4.495867756900671e-06, + "loss": 1.3468, + "step": 1944 + }, + { + "epoch": 2.1, + "learning_rate": 4.485910067774498e-06, + "loss": 1.0302, + "step": 1945 + }, + { + "epoch": 2.11, + "learning_rate": 4.475960228687437e-06, + "loss": 1.5602, + "step": 1946 + }, + { + "epoch": 2.11, + "learning_rate": 4.466018253804405e-06, + "loss": 1.3989, + "step": 1947 + }, + { + "epoch": 2.11, + "learning_rate": 4.456084157279125e-06, + "loss": 1.2241, + "step": 1948 + }, + { + "epoch": 2.11, + "learning_rate": 4.446157953254101e-06, + "loss": 1.2904, + "step": 1949 + }, + { + "epoch": 2.11, + "learning_rate": 4.436239655860599e-06, + "loss": 1.3973, + "step": 1950 + }, + { + "epoch": 2.11, + "learning_rate": 4.42632927921864e-06, + "loss": 1.4488, + "step": 1951 + }, + { + "epoch": 2.11, + "learning_rate": 4.416426837436956e-06, + "loss": 1.5007, + "step": 1952 + }, + { + "epoch": 2.11, + "learning_rate": 4.406532344612988e-06, + "loss": 1.4288, + "step": 1953 + }, + { + "epoch": 2.11, + "learning_rate": 4.39664581483286e-06, + "loss": 1.358, + "step": 1954 + }, + { + "epoch": 2.12, + "learning_rate": 4.386767262171362e-06, + "loss": 1.7036, + "step": 1955 + }, + { + "epoch": 2.12, + "learning_rate": 4.376896700691919e-06, + "loss": 1.2204, + "step": 1956 + }, + { + "epoch": 2.12, + "learning_rate": 4.367034144446594e-06, + "loss": 1.4717, + "step": 1957 + }, + { + "epoch": 2.12, + "learning_rate": 4.357179607476042e-06, + "loss": 1.4448, + "step": 1958 + }, + { + "epoch": 2.12, + "learning_rate": 4.347333103809504e-06, + "loss": 1.296, + "step": 1959 + }, + { + "epoch": 2.12, + "learning_rate": 4.337494647464785e-06, + "loss": 1.3612, + "step": 1960 + }, + { + "epoch": 2.12, + "learning_rate": 4.327664252448235e-06, + "loss": 1.4104, + "step": 1961 + }, + { + "epoch": 2.12, + "learning_rate": 4.317841932754725e-06, + "loss": 1.4945, + "step": 1962 + }, + { + "epoch": 2.12, + "learning_rate": 4.308027702367628e-06, + "loss": 1.4625, + "step": 1963 + }, + { + "epoch": 2.13, + "learning_rate": 4.298221575258814e-06, + "loss": 1.2922, + "step": 1964 + }, + { + "epoch": 2.13, + "learning_rate": 4.288423565388599e-06, + "loss": 1.5656, + "step": 1965 + }, + { + "epoch": 2.13, + "learning_rate": 4.278633686705753e-06, + "loss": 1.5695, + "step": 1966 + }, + { + "epoch": 2.13, + "learning_rate": 4.268851953147468e-06, + "loss": 1.5205, + "step": 1967 + }, + { + "epoch": 2.13, + "learning_rate": 4.259078378639338e-06, + "loss": 1.6002, + "step": 1968 + }, + { + "epoch": 2.13, + "learning_rate": 4.249312977095352e-06, + "loss": 1.5762, + "step": 1969 + }, + { + "epoch": 2.13, + "learning_rate": 4.239555762417843e-06, + "loss": 1.2551, + "step": 1970 + }, + { + "epoch": 2.13, + "learning_rate": 4.229806748497512e-06, + "loss": 1.4463, + "step": 1971 + }, + { + "epoch": 2.13, + "learning_rate": 4.2200659492133715e-06, + "loss": 1.5712, + "step": 1972 + }, + { + "epoch": 2.14, + "learning_rate": 4.2103333784327406e-06, + "loss": 1.4268, + "step": 1973 + }, + { + "epoch": 2.14, + "learning_rate": 4.200609050011229e-06, + "loss": 1.4475, + "step": 1974 + }, + { + "epoch": 2.14, + "learning_rate": 4.190892977792704e-06, + "loss": 1.4122, + "step": 1975 + }, + { + "epoch": 2.14, + "learning_rate": 4.1811851756092945e-06, + "loss": 1.6737, + "step": 1976 + }, + { + "epoch": 2.14, + "learning_rate": 4.171485657281333e-06, + "loss": 1.2418, + "step": 1977 + }, + { + "epoch": 2.14, + "learning_rate": 4.161794436617381e-06, + "loss": 1.6695, + "step": 1978 + }, + { + "epoch": 2.14, + "learning_rate": 4.152111527414177e-06, + "loss": 1.7606, + "step": 1979 + }, + { + "epoch": 2.14, + "learning_rate": 4.14243694345662e-06, + "loss": 1.2592, + "step": 1980 + }, + { + "epoch": 2.14, + "learning_rate": 4.1327706985177775e-06, + "loss": 1.2693, + "step": 1981 + }, + { + "epoch": 2.15, + "learning_rate": 4.123112806358819e-06, + "loss": 1.6064, + "step": 1982 + }, + { + "epoch": 2.15, + "learning_rate": 4.113463280729047e-06, + "loss": 1.4412, + "step": 1983 + }, + { + "epoch": 2.15, + "learning_rate": 4.103822135365837e-06, + "loss": 1.4213, + "step": 1984 + }, + { + "epoch": 2.15, + "learning_rate": 4.094189383994638e-06, + "loss": 1.8249, + "step": 1985 + }, + { + "epoch": 2.15, + "learning_rate": 4.08456504032896e-06, + "loss": 1.531, + "step": 1986 + }, + { + "epoch": 2.15, + "learning_rate": 4.074949118070319e-06, + "loss": 1.1268, + "step": 1987 + }, + { + "epoch": 2.15, + "learning_rate": 4.065341630908273e-06, + "loss": 1.2828, + "step": 1988 + }, + { + "epoch": 2.15, + "learning_rate": 4.0557425925203394e-06, + "loss": 1.4168, + "step": 1989 + }, + { + "epoch": 2.15, + "learning_rate": 4.046152016572037e-06, + "loss": 1.2655, + "step": 1990 + }, + { + "epoch": 2.15, + "learning_rate": 4.03656991671682e-06, + "loss": 1.3362, + "step": 1991 + }, + { + "epoch": 2.16, + "learning_rate": 4.026996306596073e-06, + "loss": 1.5129, + "step": 1992 + }, + { + "epoch": 2.16, + "learning_rate": 4.017431199839115e-06, + "loss": 1.2201, + "step": 1993 + }, + { + "epoch": 2.16, + "learning_rate": 4.00787461006313e-06, + "loss": 1.1742, + "step": 1994 + }, + { + "epoch": 2.16, + "learning_rate": 3.998326550873203e-06, + "loss": 1.5935, + "step": 1995 + }, + { + "epoch": 2.16, + "learning_rate": 3.9887870358622595e-06, + "loss": 1.4226, + "step": 1996 + }, + { + "epoch": 2.16, + "learning_rate": 3.979256078611068e-06, + "loss": 1.1554, + "step": 1997 + }, + { + "epoch": 2.16, + "learning_rate": 3.96973369268821e-06, + "loss": 1.4834, + "step": 1998 + }, + { + "epoch": 2.16, + "learning_rate": 3.960219891650063e-06, + "loss": 1.3716, + "step": 1999 + }, + { + "epoch": 2.16, + "learning_rate": 3.950714689040797e-06, + "loss": 1.4048, + "step": 2000 + }, + { + "epoch": 2.17, + "learning_rate": 3.941218098392313e-06, + "loss": 1.539, + "step": 2001 + }, + { + "epoch": 2.17, + "learning_rate": 3.931730133224283e-06, + "loss": 1.5156, + "step": 2002 + }, + { + "epoch": 2.17, + "learning_rate": 3.92225080704408e-06, + "loss": 1.4501, + "step": 2003 + }, + { + "epoch": 2.17, + "learning_rate": 3.912780133346783e-06, + "loss": 1.3051, + "step": 2004 + }, + { + "epoch": 2.17, + "learning_rate": 3.903318125615153e-06, + "loss": 1.4459, + "step": 2005 + }, + { + "epoch": 2.17, + "learning_rate": 3.893864797319612e-06, + "loss": 1.4032, + "step": 2006 + }, + { + "epoch": 2.17, + "learning_rate": 3.884420161918234e-06, + "loss": 1.481, + "step": 2007 + }, + { + "epoch": 2.17, + "learning_rate": 3.874984232856709e-06, + "loss": 1.3008, + "step": 2008 + }, + { + "epoch": 2.17, + "learning_rate": 3.865557023568334e-06, + "loss": 1.4493, + "step": 2009 + }, + { + "epoch": 2.18, + "learning_rate": 3.856138547473993e-06, + "loss": 1.5349, + "step": 2010 + }, + { + "epoch": 2.18, + "learning_rate": 3.846728817982137e-06, + "loss": 1.3654, + "step": 2011 + }, + { + "epoch": 2.18, + "learning_rate": 3.837327848488767e-06, + "loss": 1.4167, + "step": 2012 + }, + { + "epoch": 2.18, + "learning_rate": 3.827935652377404e-06, + "loss": 1.3823, + "step": 2013 + }, + { + "epoch": 2.18, + "learning_rate": 3.818552243019098e-06, + "loss": 1.3817, + "step": 2014 + }, + { + "epoch": 2.18, + "learning_rate": 3.8091776337723708e-06, + "loss": 1.4042, + "step": 2015 + }, + { + "epoch": 2.18, + "learning_rate": 3.7998118379832236e-06, + "loss": 1.7141, + "step": 2016 + }, + { + "epoch": 2.18, + "learning_rate": 3.790454868985113e-06, + "loss": 1.3342, + "step": 2017 + }, + { + "epoch": 2.18, + "learning_rate": 3.7811067400989233e-06, + "loss": 1.3006, + "step": 2018 + }, + { + "epoch": 2.19, + "learning_rate": 3.771767464632956e-06, + "loss": 1.562, + "step": 2019 + }, + { + "epoch": 2.19, + "learning_rate": 3.762437055882916e-06, + "loss": 1.4655, + "step": 2020 + }, + { + "epoch": 2.19, + "learning_rate": 3.7531155271318744e-06, + "loss": 1.4776, + "step": 2021 + }, + { + "epoch": 2.19, + "learning_rate": 3.7438028916502656e-06, + "loss": 1.5002, + "step": 2022 + }, + { + "epoch": 2.19, + "learning_rate": 3.734499162695864e-06, + "loss": 1.4091, + "step": 2023 + }, + { + "epoch": 2.19, + "learning_rate": 3.7252043535137606e-06, + "loss": 1.3672, + "step": 2024 + }, + { + "epoch": 2.19, + "learning_rate": 3.715918477336352e-06, + "loss": 1.5356, + "step": 2025 + }, + { + "epoch": 2.19, + "learning_rate": 3.706641547383312e-06, + "loss": 1.3502, + "step": 2026 + }, + { + "epoch": 2.19, + "learning_rate": 3.6973735768615894e-06, + "loss": 1.1353, + "step": 2027 + }, + { + "epoch": 2.19, + "learning_rate": 3.688114578965366e-06, + "loss": 1.4528, + "step": 2028 + }, + { + "epoch": 2.2, + "learning_rate": 3.678864566876055e-06, + "loss": 1.2521, + "step": 2029 + }, + { + "epoch": 2.2, + "learning_rate": 3.669623553762278e-06, + "loss": 1.4918, + "step": 2030 + }, + { + "epoch": 2.2, + "learning_rate": 3.6603915527798382e-06, + "loss": 1.5781, + "step": 2031 + }, + { + "epoch": 2.2, + "learning_rate": 3.6511685770717286e-06, + "loss": 1.5873, + "step": 2032 + }, + { + "epoch": 2.2, + "learning_rate": 3.6419546397680627e-06, + "loss": 1.395, + "step": 2033 + }, + { + "epoch": 2.2, + "learning_rate": 3.6327497539861144e-06, + "loss": 1.3087, + "step": 2034 + }, + { + "epoch": 2.2, + "learning_rate": 3.6235539328302584e-06, + "loss": 1.6652, + "step": 2035 + }, + { + "epoch": 2.2, + "learning_rate": 3.614367189391964e-06, + "loss": 1.4231, + "step": 2036 + }, + { + "epoch": 2.2, + "learning_rate": 3.605189536749788e-06, + "loss": 1.4437, + "step": 2037 + }, + { + "epoch": 2.21, + "learning_rate": 3.5960209879693263e-06, + "loss": 1.2058, + "step": 2038 + }, + { + "epoch": 2.21, + "learning_rate": 3.586861556103237e-06, + "loss": 1.5371, + "step": 2039 + }, + { + "epoch": 2.21, + "learning_rate": 3.577711254191176e-06, + "loss": 1.2755, + "step": 2040 + }, + { + "epoch": 2.21, + "learning_rate": 3.568570095259821e-06, + "loss": 1.2946, + "step": 2041 + }, + { + "epoch": 2.21, + "learning_rate": 3.5594380923228244e-06, + "loss": 1.0936, + "step": 2042 + }, + { + "epoch": 2.21, + "learning_rate": 3.5503152583807987e-06, + "loss": 1.3753, + "step": 2043 + }, + { + "epoch": 2.21, + "learning_rate": 3.5412016064213216e-06, + "loss": 1.3818, + "step": 2044 + }, + { + "epoch": 2.21, + "learning_rate": 3.5320971494188715e-06, + "loss": 1.5873, + "step": 2045 + }, + { + "epoch": 2.21, + "learning_rate": 3.5230019003348627e-06, + "loss": 1.4284, + "step": 2046 + }, + { + "epoch": 2.22, + "learning_rate": 3.513915872117586e-06, + "loss": 1.2154, + "step": 2047 + }, + { + "epoch": 2.22, + "learning_rate": 3.504839077702207e-06, + "loss": 1.2334, + "step": 2048 + }, + { + "epoch": 2.22, + "learning_rate": 3.495771530010755e-06, + "loss": 1.46, + "step": 2049 + }, + { + "epoch": 2.22, + "learning_rate": 3.486713241952078e-06, + "loss": 1.5319, + "step": 2050 + }, + { + "epoch": 2.22, + "learning_rate": 3.477664226421862e-06, + "loss": 1.1816, + "step": 2051 + }, + { + "epoch": 2.22, + "learning_rate": 3.4686244963025704e-06, + "loss": 1.4446, + "step": 2052 + }, + { + "epoch": 2.22, + "learning_rate": 3.4595940644634684e-06, + "loss": 1.2212, + "step": 2053 + }, + { + "epoch": 2.22, + "learning_rate": 3.4505729437605727e-06, + "loss": 1.4979, + "step": 2054 + }, + { + "epoch": 2.22, + "learning_rate": 3.441561147036642e-06, + "loss": 1.2802, + "step": 2055 + }, + { + "epoch": 2.23, + "learning_rate": 3.4325586871211745e-06, + "loss": 1.2958, + "step": 2056 + }, + { + "epoch": 2.23, + "learning_rate": 3.4235655768303542e-06, + "loss": 1.1839, + "step": 2057 + }, + { + "epoch": 2.23, + "learning_rate": 3.4145818289670796e-06, + "loss": 1.3612, + "step": 2058 + }, + { + "epoch": 2.23, + "learning_rate": 3.405607456320903e-06, + "loss": 1.3424, + "step": 2059 + }, + { + "epoch": 2.23, + "learning_rate": 3.396642471668037e-06, + "loss": 1.3505, + "step": 2060 + }, + { + "epoch": 2.23, + "learning_rate": 3.3876868877713253e-06, + "loss": 1.3737, + "step": 2061 + }, + { + "epoch": 2.23, + "learning_rate": 3.378740717380229e-06, + "loss": 1.2234, + "step": 2062 + }, + { + "epoch": 2.23, + "learning_rate": 3.3698039732308197e-06, + "loss": 1.416, + "step": 2063 + }, + { + "epoch": 2.23, + "learning_rate": 3.360876668045725e-06, + "loss": 1.3574, + "step": 2064 + }, + { + "epoch": 2.23, + "learning_rate": 3.3519588145341586e-06, + "loss": 1.3652, + "step": 2065 + }, + { + "epoch": 2.24, + "learning_rate": 3.343050425391866e-06, + "loss": 1.1773, + "step": 2066 + }, + { + "epoch": 2.24, + "learning_rate": 3.3341515133011236e-06, + "loss": 1.2722, + "step": 2067 + }, + { + "epoch": 2.24, + "learning_rate": 3.3252620909307123e-06, + "loss": 1.3004, + "step": 2068 + }, + { + "epoch": 2.24, + "learning_rate": 3.316382170935901e-06, + "loss": 1.3741, + "step": 2069 + }, + { + "epoch": 2.24, + "learning_rate": 3.3075117659584444e-06, + "loss": 1.4394, + "step": 2070 + }, + { + "epoch": 2.24, + "learning_rate": 3.2986508886265367e-06, + "loss": 1.459, + "step": 2071 + }, + { + "epoch": 2.24, + "learning_rate": 3.289799551554812e-06, + "loss": 1.365, + "step": 2072 + }, + { + "epoch": 2.24, + "learning_rate": 3.280957767344326e-06, + "loss": 1.2966, + "step": 2073 + }, + { + "epoch": 2.24, + "learning_rate": 3.272125548582531e-06, + "loss": 1.2573, + "step": 2074 + }, + { + "epoch": 2.25, + "learning_rate": 3.263302907843263e-06, + "loss": 1.5322, + "step": 2075 + }, + { + "epoch": 2.25, + "learning_rate": 3.2544898576867212e-06, + "loss": 1.3625, + "step": 2076 + }, + { + "epoch": 2.25, + "learning_rate": 3.2456864106594564e-06, + "loss": 1.4287, + "step": 2077 + }, + { + "epoch": 2.25, + "learning_rate": 3.236892579294343e-06, + "loss": 1.4564, + "step": 2078 + }, + { + "epoch": 2.25, + "learning_rate": 3.2281083761105657e-06, + "loss": 1.5942, + "step": 2079 + }, + { + "epoch": 2.25, + "learning_rate": 3.2193338136136055e-06, + "loss": 1.7454, + "step": 2080 + }, + { + "epoch": 2.25, + "learning_rate": 3.210568904295214e-06, + "loss": 1.5231, + "step": 2081 + }, + { + "epoch": 2.25, + "learning_rate": 3.201813660633407e-06, + "loss": 1.0115, + "step": 2082 + }, + { + "epoch": 2.25, + "learning_rate": 3.1930680950924375e-06, + "loss": 1.3776, + "step": 2083 + }, + { + "epoch": 2.26, + "learning_rate": 3.1843322201227755e-06, + "loss": 1.4914, + "step": 2084 + }, + { + "epoch": 2.26, + "learning_rate": 3.1756060481611006e-06, + "loss": 1.4918, + "step": 2085 + }, + { + "epoch": 2.26, + "learning_rate": 3.1668895916302765e-06, + "loss": 1.46, + "step": 2086 + }, + { + "epoch": 2.26, + "learning_rate": 3.158182862939334e-06, + "loss": 1.3948, + "step": 2087 + }, + { + "epoch": 2.26, + "learning_rate": 3.149485874483462e-06, + "loss": 1.1684, + "step": 2088 + }, + { + "epoch": 2.26, + "learning_rate": 3.140798638643977e-06, + "loss": 1.3323, + "step": 2089 + }, + { + "epoch": 2.26, + "learning_rate": 3.1321211677883122e-06, + "loss": 1.6293, + "step": 2090 + }, + { + "epoch": 2.26, + "learning_rate": 3.1234534742699984e-06, + "loss": 1.3293, + "step": 2091 + }, + { + "epoch": 2.26, + "learning_rate": 3.1147955704286483e-06, + "loss": 1.2917, + "step": 2092 + }, + { + "epoch": 2.27, + "learning_rate": 3.1061474685899386e-06, + "loss": 1.5275, + "step": 2093 + }, + { + "epoch": 2.27, + "learning_rate": 3.0975091810655868e-06, + "loss": 1.4996, + "step": 2094 + }, + { + "epoch": 2.27, + "learning_rate": 3.088880720153348e-06, + "loss": 1.4054, + "step": 2095 + }, + { + "epoch": 2.27, + "learning_rate": 3.080262098136979e-06, + "loss": 1.2308, + "step": 2096 + }, + { + "epoch": 2.27, + "learning_rate": 3.0716533272862327e-06, + "loss": 1.5492, + "step": 2097 + }, + { + "epoch": 2.27, + "learning_rate": 3.063054419856837e-06, + "loss": 1.3727, + "step": 2098 + }, + { + "epoch": 2.27, + "learning_rate": 3.054465388090475e-06, + "loss": 1.4997, + "step": 2099 + }, + { + "epoch": 2.27, + "learning_rate": 3.0458862442147843e-06, + "loss": 1.4322, + "step": 2100 + }, + { + "epoch": 2.27, + "learning_rate": 3.037317000443302e-06, + "loss": 1.3417, + "step": 2101 + }, + { + "epoch": 2.27, + "learning_rate": 3.0287576689754926e-06, + "loss": 1.2467, + "step": 2102 + }, + { + "epoch": 2.28, + "learning_rate": 3.0202082619966987e-06, + "loss": 1.6833, + "step": 2103 + }, + { + "epoch": 2.28, + "learning_rate": 3.0116687916781374e-06, + "loss": 1.4901, + "step": 2104 + }, + { + "epoch": 2.28, + "learning_rate": 3.003139270176877e-06, + "loss": 1.6554, + "step": 2105 + }, + { + "epoch": 2.28, + "learning_rate": 2.9946197096358197e-06, + "loss": 1.2817, + "step": 2106 + }, + { + "epoch": 2.28, + "learning_rate": 2.986110122183703e-06, + "loss": 1.4481, + "step": 2107 + }, + { + "epoch": 2.28, + "learning_rate": 2.977610519935041e-06, + "loss": 1.3326, + "step": 2108 + }, + { + "epoch": 2.28, + "learning_rate": 2.9691209149901543e-06, + "loss": 1.3675, + "step": 2109 + }, + { + "epoch": 2.28, + "learning_rate": 2.9606413194351214e-06, + "loss": 1.2567, + "step": 2110 + }, + { + "epoch": 2.28, + "learning_rate": 2.9521717453417665e-06, + "loss": 1.5993, + "step": 2111 + }, + { + "epoch": 2.29, + "learning_rate": 2.9437122047676648e-06, + "loss": 1.5903, + "step": 2112 + }, + { + "epoch": 2.29, + "learning_rate": 2.9352627097560826e-06, + "loss": 1.2208, + "step": 2113 + }, + { + "epoch": 2.29, + "learning_rate": 2.926823272336009e-06, + "loss": 1.2143, + "step": 2114 + }, + { + "epoch": 2.29, + "learning_rate": 2.9183939045220923e-06, + "loss": 1.3815, + "step": 2115 + }, + { + "epoch": 2.29, + "learning_rate": 2.909974618314665e-06, + "loss": 1.332, + "step": 2116 + }, + { + "epoch": 2.29, + "learning_rate": 2.9015654256996972e-06, + "loss": 1.5051, + "step": 2117 + }, + { + "epoch": 2.29, + "learning_rate": 2.8931663386487863e-06, + "loss": 1.1522, + "step": 2118 + }, + { + "epoch": 2.29, + "learning_rate": 2.8847773691191596e-06, + "loss": 1.3759, + "step": 2119 + }, + { + "epoch": 2.29, + "learning_rate": 2.8763985290536135e-06, + "loss": 1.4331, + "step": 2120 + }, + { + "epoch": 2.3, + "learning_rate": 2.868029830380551e-06, + "loss": 1.4589, + "step": 2121 + }, + { + "epoch": 2.3, + "learning_rate": 2.859671285013922e-06, + "loss": 1.4327, + "step": 2122 + }, + { + "epoch": 2.3, + "learning_rate": 2.851322904853222e-06, + "loss": 1.323, + "step": 2123 + }, + { + "epoch": 2.3, + "learning_rate": 2.8429847017834877e-06, + "loss": 1.4878, + "step": 2124 + }, + { + "epoch": 2.3, + "learning_rate": 2.834656687675247e-06, + "loss": 1.5697, + "step": 2125 + }, + { + "epoch": 2.3, + "learning_rate": 2.826338874384541e-06, + "loss": 1.4001, + "step": 2126 + }, + { + "epoch": 2.3, + "learning_rate": 2.8180312737528804e-06, + "loss": 1.0184, + "step": 2127 + }, + { + "epoch": 2.3, + "learning_rate": 2.8097338976072362e-06, + "loss": 1.3859, + "step": 2128 + }, + { + "epoch": 2.3, + "learning_rate": 2.801446757760026e-06, + "loss": 1.3821, + "step": 2129 + }, + { + "epoch": 2.31, + "learning_rate": 2.79316986600909e-06, + "loss": 1.5042, + "step": 2130 + }, + { + "epoch": 2.31, + "learning_rate": 2.7849032341376914e-06, + "loss": 1.4607, + "step": 2131 + }, + { + "epoch": 2.31, + "learning_rate": 2.776646873914466e-06, + "loss": 1.5038, + "step": 2132 + }, + { + "epoch": 2.31, + "learning_rate": 2.768400797093449e-06, + "loss": 1.2624, + "step": 2133 + }, + { + "epoch": 2.31, + "learning_rate": 2.76016501541402e-06, + "loss": 1.3989, + "step": 2134 + }, + { + "epoch": 2.31, + "learning_rate": 2.7519395406009107e-06, + "loss": 1.493, + "step": 2135 + }, + { + "epoch": 2.31, + "learning_rate": 2.743724384364176e-06, + "loss": 1.4691, + "step": 2136 + }, + { + "epoch": 2.31, + "learning_rate": 2.735519558399177e-06, + "loss": 1.7036, + "step": 2137 + }, + { + "epoch": 2.31, + "learning_rate": 2.727325074386579e-06, + "loss": 1.3342, + "step": 2138 + }, + { + "epoch": 2.31, + "learning_rate": 2.719140943992317e-06, + "loss": 1.4529, + "step": 2139 + }, + { + "epoch": 2.32, + "learning_rate": 2.7109671788675838e-06, + "loss": 1.3309, + "step": 2140 + }, + { + "epoch": 2.32, + "learning_rate": 2.7028037906488223e-06, + "loss": 1.227, + "step": 2141 + }, + { + "epoch": 2.32, + "learning_rate": 2.694650790957698e-06, + "loss": 1.4928, + "step": 2142 + }, + { + "epoch": 2.32, + "learning_rate": 2.6865081914010894e-06, + "loss": 1.2924, + "step": 2143 + }, + { + "epoch": 2.32, + "learning_rate": 2.6783760035710636e-06, + "loss": 1.4359, + "step": 2144 + }, + { + "epoch": 2.32, + "learning_rate": 2.6702542390448773e-06, + "loss": 1.3516, + "step": 2145 + }, + { + "epoch": 2.32, + "learning_rate": 2.662142909384937e-06, + "loss": 1.3663, + "step": 2146 + }, + { + "epoch": 2.32, + "learning_rate": 2.6540420261387966e-06, + "loss": 1.6022, + "step": 2147 + }, + { + "epoch": 2.32, + "learning_rate": 2.6459516008391394e-06, + "loss": 1.6489, + "step": 2148 + }, + { + "epoch": 2.33, + "learning_rate": 2.63787164500376e-06, + "loss": 1.3224, + "step": 2149 + }, + { + "epoch": 2.33, + "learning_rate": 2.629802170135546e-06, + "loss": 1.472, + "step": 2150 + }, + { + "epoch": 2.33, + "learning_rate": 2.621743187722472e-06, + "loss": 1.5771, + "step": 2151 + }, + { + "epoch": 2.33, + "learning_rate": 2.6136947092375655e-06, + "loss": 1.1191, + "step": 2152 + }, + { + "epoch": 2.33, + "learning_rate": 2.605656746138905e-06, + "loss": 1.3474, + "step": 2153 + }, + { + "epoch": 2.33, + "learning_rate": 2.597629309869597e-06, + "loss": 1.3218, + "step": 2154 + }, + { + "epoch": 2.33, + "learning_rate": 2.589612411857765e-06, + "loss": 1.6099, + "step": 2155 + }, + { + "epoch": 2.33, + "learning_rate": 2.5816060635165254e-06, + "loss": 1.5729, + "step": 2156 + }, + { + "epoch": 2.33, + "learning_rate": 2.5736102762439775e-06, + "loss": 1.3291, + "step": 2157 + }, + { + "epoch": 2.34, + "learning_rate": 2.5656250614231893e-06, + "loss": 1.2772, + "step": 2158 + }, + { + "epoch": 2.34, + "learning_rate": 2.5576504304221727e-06, + "loss": 1.4413, + "step": 2159 + }, + { + "epoch": 2.34, + "learning_rate": 2.549686394593874e-06, + "loss": 1.3817, + "step": 2160 + }, + { + "epoch": 2.34, + "learning_rate": 2.541732965276156e-06, + "loss": 1.3927, + "step": 2161 + }, + { + "epoch": 2.34, + "learning_rate": 2.5337901537917776e-06, + "loss": 1.3298, + "step": 2162 + }, + { + "epoch": 2.34, + "learning_rate": 2.5258579714483933e-06, + "loss": 1.4091, + "step": 2163 + }, + { + "epoch": 2.34, + "learning_rate": 2.517936429538508e-06, + "loss": 1.4948, + "step": 2164 + }, + { + "epoch": 2.34, + "learning_rate": 2.5100255393394956e-06, + "loss": 1.2598, + "step": 2165 + }, + { + "epoch": 2.34, + "learning_rate": 2.502125312113556e-06, + "loss": 1.4503, + "step": 2166 + }, + { + "epoch": 2.35, + "learning_rate": 2.4942357591077125e-06, + "loss": 1.2476, + "step": 2167 + }, + { + "epoch": 2.35, + "learning_rate": 2.4863568915537893e-06, + "loss": 1.4236, + "step": 2168 + }, + { + "epoch": 2.35, + "learning_rate": 2.4784887206684007e-06, + "loss": 1.6978, + "step": 2169 + }, + { + "epoch": 2.35, + "learning_rate": 2.470631257652939e-06, + "loss": 1.3295, + "step": 2170 + }, + { + "epoch": 2.35, + "learning_rate": 2.4627845136935336e-06, + "loss": 1.4273, + "step": 2171 + }, + { + "epoch": 2.35, + "learning_rate": 2.4549484999610773e-06, + "loss": 1.3365, + "step": 2172 + }, + { + "epoch": 2.35, + "learning_rate": 2.4471232276111723e-06, + "loss": 1.143, + "step": 2173 + }, + { + "epoch": 2.35, + "learning_rate": 2.4393087077841293e-06, + "loss": 1.5348, + "step": 2174 + }, + { + "epoch": 2.35, + "learning_rate": 2.431504951604964e-06, + "loss": 1.6333, + "step": 2175 + }, + { + "epoch": 2.35, + "learning_rate": 2.423711970183347e-06, + "loss": 1.3676, + "step": 2176 + }, + { + "epoch": 2.36, + "learning_rate": 2.415929774613631e-06, + "loss": 1.2428, + "step": 2177 + }, + { + "epoch": 2.36, + "learning_rate": 2.408158375974804e-06, + "loss": 1.3881, + "step": 2178 + }, + { + "epoch": 2.36, + "learning_rate": 2.4003977853304774e-06, + "loss": 1.3635, + "step": 2179 + }, + { + "epoch": 2.36, + "learning_rate": 2.392648013728893e-06, + "loss": 1.4889, + "step": 2180 + }, + { + "epoch": 2.36, + "learning_rate": 2.384909072202868e-06, + "loss": 1.5048, + "step": 2181 + }, + { + "epoch": 2.36, + "learning_rate": 2.3771809717698233e-06, + "loss": 1.4755, + "step": 2182 + }, + { + "epoch": 2.36, + "learning_rate": 2.3694637234317252e-06, + "loss": 1.1849, + "step": 2183 + }, + { + "epoch": 2.36, + "learning_rate": 2.3617573381751114e-06, + "loss": 1.5121, + "step": 2184 + }, + { + "epoch": 2.36, + "learning_rate": 2.3540618269710393e-06, + "loss": 1.3481, + "step": 2185 + }, + { + "epoch": 2.37, + "learning_rate": 2.3463772007750898e-06, + "loss": 1.313, + "step": 2186 + }, + { + "epoch": 2.37, + "learning_rate": 2.338703470527355e-06, + "loss": 1.3664, + "step": 2187 + }, + { + "epoch": 2.37, + "learning_rate": 2.3310406471523994e-06, + "loss": 1.2965, + "step": 2188 + }, + { + "epoch": 2.37, + "learning_rate": 2.323388741559277e-06, + "loss": 1.5522, + "step": 2189 + }, + { + "epoch": 2.37, + "learning_rate": 2.3157477646414896e-06, + "loss": 1.3192, + "step": 2190 + }, + { + "epoch": 2.37, + "learning_rate": 2.3081177272769806e-06, + "loss": 1.2854, + "step": 2191 + }, + { + "epoch": 2.37, + "learning_rate": 2.3004986403281215e-06, + "loss": 1.4903, + "step": 2192 + }, + { + "epoch": 2.37, + "learning_rate": 2.2928905146416925e-06, + "loss": 1.422, + "step": 2193 + }, + { + "epoch": 2.37, + "learning_rate": 2.285293361048878e-06, + "loss": 1.3284, + "step": 2194 + }, + { + "epoch": 2.38, + "learning_rate": 2.2777071903652248e-06, + "loss": 1.5621, + "step": 2195 + }, + { + "epoch": 2.38, + "learning_rate": 2.2701320133906614e-06, + "loss": 1.5547, + "step": 2196 + }, + { + "epoch": 2.38, + "learning_rate": 2.2625678409094554e-06, + "loss": 1.424, + "step": 2197 + }, + { + "epoch": 2.38, + "learning_rate": 2.2550146836902107e-06, + "loss": 1.5292, + "step": 2198 + }, + { + "epoch": 2.38, + "learning_rate": 2.247472552485849e-06, + "loss": 1.5616, + "step": 2199 + }, + { + "epoch": 2.38, + "learning_rate": 2.239941458033593e-06, + "loss": 1.3659, + "step": 2200 + }, + { + "epoch": 2.38, + "learning_rate": 2.2324214110549613e-06, + "loss": 1.6546, + "step": 2201 + }, + { + "epoch": 2.38, + "learning_rate": 2.2249124222557362e-06, + "loss": 1.4161, + "step": 2202 + }, + { + "epoch": 2.38, + "learning_rate": 2.2174145023259595e-06, + "loss": 1.4106, + "step": 2203 + }, + { + "epoch": 2.39, + "learning_rate": 2.209927661939918e-06, + "loss": 1.258, + "step": 2204 + }, + { + "epoch": 2.39, + "learning_rate": 2.2024519117561203e-06, + "loss": 1.5043, + "step": 2205 + }, + { + "epoch": 2.39, + "learning_rate": 2.1949872624172907e-06, + "loss": 1.388, + "step": 2206 + }, + { + "epoch": 2.39, + "learning_rate": 2.187533724550346e-06, + "loss": 1.4317, + "step": 2207 + }, + { + "epoch": 2.39, + "learning_rate": 2.180091308766391e-06, + "loss": 1.29, + "step": 2208 + }, + { + "epoch": 2.39, + "learning_rate": 2.172660025660692e-06, + "loss": 1.3285, + "step": 2209 + }, + { + "epoch": 2.39, + "learning_rate": 2.1652398858126656e-06, + "loss": 1.6029, + "step": 2210 + }, + { + "epoch": 2.39, + "learning_rate": 2.1578308997858664e-06, + "loss": 1.3068, + "step": 2211 + }, + { + "epoch": 2.39, + "learning_rate": 2.1504330781279703e-06, + "loss": 1.6577, + "step": 2212 + }, + { + "epoch": 2.4, + "learning_rate": 2.1430464313707557e-06, + "loss": 1.4977, + "step": 2213 + }, + { + "epoch": 2.4, + "learning_rate": 2.1356709700300994e-06, + "loss": 1.2976, + "step": 2214 + }, + { + "epoch": 2.4, + "learning_rate": 2.128306704605949e-06, + "loss": 1.2786, + "step": 2215 + }, + { + "epoch": 2.4, + "learning_rate": 2.1209536455823123e-06, + "loss": 1.3484, + "step": 2216 + }, + { + "epoch": 2.4, + "learning_rate": 2.1136118034272444e-06, + "loss": 1.1646, + "step": 2217 + }, + { + "epoch": 2.4, + "learning_rate": 2.106281188592836e-06, + "loss": 1.5719, + "step": 2218 + }, + { + "epoch": 2.4, + "learning_rate": 2.098961811515187e-06, + "loss": 1.5823, + "step": 2219 + }, + { + "epoch": 2.4, + "learning_rate": 2.0916536826144006e-06, + "loss": 1.3301, + "step": 2220 + }, + { + "epoch": 2.4, + "learning_rate": 2.0843568122945747e-06, + "loss": 1.5403, + "step": 2221 + }, + { + "epoch": 2.4, + "learning_rate": 2.077071210943771e-06, + "loss": 1.4609, + "step": 2222 + }, + { + "epoch": 2.41, + "learning_rate": 2.069796888934008e-06, + "loss": 1.1364, + "step": 2223 + }, + { + "epoch": 2.41, + "learning_rate": 2.0625338566212505e-06, + "loss": 1.2703, + "step": 2224 + }, + { + "epoch": 2.41, + "learning_rate": 2.055282124345387e-06, + "loss": 1.7914, + "step": 2225 + }, + { + "epoch": 2.41, + "learning_rate": 2.048041702430228e-06, + "loss": 1.0629, + "step": 2226 + }, + { + "epoch": 2.41, + "learning_rate": 2.040812601183465e-06, + "loss": 1.3095, + "step": 2227 + }, + { + "epoch": 2.41, + "learning_rate": 2.033594830896691e-06, + "loss": 1.2865, + "step": 2228 + }, + { + "epoch": 2.41, + "learning_rate": 2.0263884018453596e-06, + "loss": 1.6661, + "step": 2229 + }, + { + "epoch": 2.41, + "learning_rate": 2.019193324288775e-06, + "loss": 1.4554, + "step": 2230 + }, + { + "epoch": 2.41, + "learning_rate": 2.0120096084700936e-06, + "loss": 1.6451, + "step": 2231 + }, + { + "epoch": 2.42, + "learning_rate": 2.0048372646162774e-06, + "loss": 1.3967, + "step": 2232 + }, + { + "epoch": 2.42, + "learning_rate": 1.9976763029381197e-06, + "loss": 1.4399, + "step": 2233 + }, + { + "epoch": 2.42, + "learning_rate": 1.990526733630196e-06, + "loss": 1.5767, + "step": 2234 + }, + { + "epoch": 2.42, + "learning_rate": 1.9833885668708686e-06, + "loss": 1.1332, + "step": 2235 + }, + { + "epoch": 2.42, + "learning_rate": 1.9762618128222653e-06, + "loss": 1.1133, + "step": 2236 + }, + { + "epoch": 2.42, + "learning_rate": 1.969146481630263e-06, + "loss": 1.4982, + "step": 2237 + }, + { + "epoch": 2.42, + "learning_rate": 1.9620425834244893e-06, + "loss": 1.6646, + "step": 2238 + }, + { + "epoch": 2.42, + "learning_rate": 1.954950128318276e-06, + "loss": 1.4872, + "step": 2239 + }, + { + "epoch": 2.42, + "learning_rate": 1.9478691264086824e-06, + "loss": 1.5388, + "step": 2240 + }, + { + "epoch": 2.43, + "learning_rate": 1.940799587776452e-06, + "loss": 1.3563, + "step": 2241 + }, + { + "epoch": 2.43, + "learning_rate": 1.933741522486009e-06, + "loss": 1.2739, + "step": 2242 + }, + { + "epoch": 2.43, + "learning_rate": 1.926694940585454e-06, + "loss": 1.2479, + "step": 2243 + }, + { + "epoch": 2.43, + "learning_rate": 1.9196598521065212e-06, + "loss": 1.4168, + "step": 2244 + }, + { + "epoch": 2.43, + "learning_rate": 1.912636267064605e-06, + "loss": 1.1941, + "step": 2245 + }, + { + "epoch": 2.43, + "learning_rate": 1.9056241954586997e-06, + "loss": 1.5037, + "step": 2246 + }, + { + "epoch": 2.43, + "learning_rate": 1.8986236472714281e-06, + "loss": 1.5659, + "step": 2247 + }, + { + "epoch": 2.43, + "learning_rate": 1.8916346324689982e-06, + "loss": 1.2473, + "step": 2248 + }, + { + "epoch": 2.43, + "learning_rate": 1.8846571610011965e-06, + "loss": 1.3946, + "step": 2249 + }, + { + "epoch": 2.44, + "learning_rate": 1.8776912428013883e-06, + "loss": 1.5602, + "step": 2250 + }, + { + "epoch": 2.44, + "learning_rate": 1.8707368877864729e-06, + "loss": 1.3332, + "step": 2251 + }, + { + "epoch": 2.44, + "learning_rate": 1.8637941058569065e-06, + "loss": 1.6887, + "step": 2252 + }, + { + "epoch": 2.44, + "learning_rate": 1.856862906896657e-06, + "loss": 1.4142, + "step": 2253 + }, + { + "epoch": 2.44, + "learning_rate": 1.8499433007732037e-06, + "loss": 1.2743, + "step": 2254 + }, + { + "epoch": 2.44, + "learning_rate": 1.8430352973375344e-06, + "loss": 1.2524, + "step": 2255 + }, + { + "epoch": 2.44, + "learning_rate": 1.8361389064240965e-06, + "loss": 1.4024, + "step": 2256 + }, + { + "epoch": 2.44, + "learning_rate": 1.829254137850829e-06, + "loss": 1.3586, + "step": 2257 + }, + { + "epoch": 2.44, + "learning_rate": 1.8223810014191046e-06, + "loss": 1.6931, + "step": 2258 + }, + { + "epoch": 2.44, + "learning_rate": 1.815519506913752e-06, + "loss": 1.3345, + "step": 2259 + }, + { + "epoch": 2.45, + "learning_rate": 1.808669664103019e-06, + "loss": 1.5224, + "step": 2260 + }, + { + "epoch": 2.45, + "learning_rate": 1.8018314827385618e-06, + "loss": 1.4597, + "step": 2261 + }, + { + "epoch": 2.45, + "learning_rate": 1.7950049725554476e-06, + "loss": 1.4612, + "step": 2262 + }, + { + "epoch": 2.45, + "learning_rate": 1.7881901432721104e-06, + "loss": 1.3099, + "step": 2263 + }, + { + "epoch": 2.45, + "learning_rate": 1.7813870045903715e-06, + "loss": 1.4219, + "step": 2264 + }, + { + "epoch": 2.45, + "learning_rate": 1.7745955661953996e-06, + "loss": 1.3978, + "step": 2265 + }, + { + "epoch": 2.45, + "learning_rate": 1.7678158377557097e-06, + "loss": 1.4722, + "step": 2266 + }, + { + "epoch": 2.45, + "learning_rate": 1.7610478289231435e-06, + "loss": 1.4937, + "step": 2267 + }, + { + "epoch": 2.45, + "learning_rate": 1.7542915493328594e-06, + "loss": 1.4675, + "step": 2268 + }, + { + "epoch": 2.46, + "learning_rate": 1.7475470086033264e-06, + "loss": 1.5819, + "step": 2269 + }, + { + "epoch": 2.46, + "learning_rate": 1.7408142163362818e-06, + "loss": 1.4146, + "step": 2270 + }, + { + "epoch": 2.46, + "learning_rate": 1.7340931821167572e-06, + "loss": 1.3415, + "step": 2271 + }, + { + "epoch": 2.46, + "learning_rate": 1.7273839155130355e-06, + "loss": 1.5336, + "step": 2272 + }, + { + "epoch": 2.46, + "learning_rate": 1.7206864260766486e-06, + "loss": 1.4796, + "step": 2273 + }, + { + "epoch": 2.46, + "learning_rate": 1.7140007233423606e-06, + "loss": 1.5048, + "step": 2274 + }, + { + "epoch": 2.46, + "learning_rate": 1.7073268168281564e-06, + "loss": 1.5898, + "step": 2275 + }, + { + "epoch": 2.46, + "learning_rate": 1.7006647160352308e-06, + "loss": 1.4885, + "step": 2276 + }, + { + "epoch": 2.46, + "learning_rate": 1.6940144304479678e-06, + "loss": 1.2994, + "step": 2277 + }, + { + "epoch": 2.47, + "learning_rate": 1.687375969533932e-06, + "loss": 1.4332, + "step": 2278 + }, + { + "epoch": 2.47, + "learning_rate": 1.6807493427438526e-06, + "loss": 1.3831, + "step": 2279 + }, + { + "epoch": 2.47, + "learning_rate": 1.6741345595116133e-06, + "loss": 1.4273, + "step": 2280 + }, + { + "epoch": 2.47, + "learning_rate": 1.6675316292542344e-06, + "loss": 1.0097, + "step": 2281 + }, + { + "epoch": 2.47, + "learning_rate": 1.6609405613718688e-06, + "loss": 1.3448, + "step": 2282 + }, + { + "epoch": 2.47, + "learning_rate": 1.654361365247773e-06, + "loss": 1.5362, + "step": 2283 + }, + { + "epoch": 2.47, + "learning_rate": 1.647794050248307e-06, + "loss": 1.3242, + "step": 2284 + }, + { + "epoch": 2.47, + "learning_rate": 1.6412386257229152e-06, + "loss": 1.3754, + "step": 2285 + }, + { + "epoch": 2.47, + "learning_rate": 1.6346951010041146e-06, + "loss": 1.7743, + "step": 2286 + }, + { + "epoch": 2.48, + "learning_rate": 1.6281634854074823e-06, + "loss": 1.568, + "step": 2287 + }, + { + "epoch": 2.48, + "learning_rate": 1.6216437882316382e-06, + "loss": 1.2589, + "step": 2288 + }, + { + "epoch": 2.48, + "learning_rate": 1.61513601875824e-06, + "loss": 1.5652, + "step": 2289 + }, + { + "epoch": 2.48, + "learning_rate": 1.6086401862519608e-06, + "loss": 1.793, + "step": 2290 + }, + { + "epoch": 2.48, + "learning_rate": 1.6021562999604789e-06, + "loss": 1.4683, + "step": 2291 + }, + { + "epoch": 2.48, + "learning_rate": 1.5956843691144686e-06, + "loss": 1.6087, + "step": 2292 + }, + { + "epoch": 2.48, + "learning_rate": 1.5892244029275805e-06, + "loss": 1.3121, + "step": 2293 + }, + { + "epoch": 2.48, + "learning_rate": 1.5827764105964405e-06, + "loss": 1.2332, + "step": 2294 + }, + { + "epoch": 2.48, + "learning_rate": 1.5763404013006124e-06, + "loss": 1.3393, + "step": 2295 + }, + { + "epoch": 2.48, + "learning_rate": 1.5699163842026166e-06, + "loss": 1.4695, + "step": 2296 + }, + { + "epoch": 2.49, + "learning_rate": 1.5635043684478933e-06, + "loss": 1.4767, + "step": 2297 + }, + { + "epoch": 2.49, + "learning_rate": 1.5571043631647976e-06, + "loss": 1.3673, + "step": 2298 + }, + { + "epoch": 2.49, + "learning_rate": 1.5507163774645862e-06, + "loss": 1.454, + "step": 2299 + }, + { + "epoch": 2.49, + "learning_rate": 1.5443404204414025e-06, + "loss": 1.3161, + "step": 2300 + }, + { + "epoch": 2.49, + "learning_rate": 1.5379765011722758e-06, + "loss": 1.6034, + "step": 2301 + }, + { + "epoch": 2.49, + "learning_rate": 1.531624628717081e-06, + "loss": 1.4068, + "step": 2302 + }, + { + "epoch": 2.49, + "learning_rate": 1.525284812118557e-06, + "loss": 1.3525, + "step": 2303 + }, + { + "epoch": 2.49, + "learning_rate": 1.518957060402274e-06, + "loss": 1.4532, + "step": 2304 + }, + { + "epoch": 2.49, + "learning_rate": 1.512641382576624e-06, + "loss": 1.4955, + "step": 2305 + }, + { + "epoch": 2.5, + "learning_rate": 1.506337787632819e-06, + "loss": 1.3729, + "step": 2306 + }, + { + "epoch": 2.5, + "learning_rate": 1.5000462845448537e-06, + "loss": 1.7372, + "step": 2307 + }, + { + "epoch": 2.5, + "learning_rate": 1.4937668822695294e-06, + "loss": 1.6803, + "step": 2308 + }, + { + "epoch": 2.5, + "learning_rate": 1.4874995897463972e-06, + "loss": 1.2643, + "step": 2309 + }, + { + "epoch": 2.5, + "learning_rate": 1.481244415897789e-06, + "loss": 1.6122, + "step": 2310 + }, + { + "epoch": 2.5, + "learning_rate": 1.4750013696287723e-06, + "loss": 1.4969, + "step": 2311 + }, + { + "epoch": 2.5, + "learning_rate": 1.4687704598271502e-06, + "loss": 1.4946, + "step": 2312 + }, + { + "epoch": 2.5, + "learning_rate": 1.4625516953634567e-06, + "loss": 1.4947, + "step": 2313 + }, + { + "epoch": 2.5, + "learning_rate": 1.456345085090919e-06, + "loss": 1.5622, + "step": 2314 + }, + { + "epoch": 2.51, + "learning_rate": 1.4501506378454787e-06, + "loss": 1.0848, + "step": 2315 + }, + { + "epoch": 2.51, + "learning_rate": 1.44396836244575e-06, + "loss": 1.3604, + "step": 2316 + }, + { + "epoch": 2.51, + "learning_rate": 1.4377982676930235e-06, + "loss": 1.521, + "step": 2317 + }, + { + "epoch": 2.51, + "learning_rate": 1.4316403623712516e-06, + "loss": 1.5201, + "step": 2318 + }, + { + "epoch": 2.51, + "learning_rate": 1.425494655247024e-06, + "loss": 1.3448, + "step": 2319 + }, + { + "epoch": 2.51, + "learning_rate": 1.4193611550695773e-06, + "loss": 1.4419, + "step": 2320 + }, + { + "epoch": 2.51, + "learning_rate": 1.413239870570756e-06, + "loss": 1.6954, + "step": 2321 + }, + { + "epoch": 2.51, + "learning_rate": 1.4071308104650272e-06, + "loss": 1.5843, + "step": 2322 + }, + { + "epoch": 2.51, + "learning_rate": 1.4010339834494468e-06, + "loss": 1.7759, + "step": 2323 + }, + { + "epoch": 2.52, + "learning_rate": 1.3949493982036555e-06, + "loss": 1.3358, + "step": 2324 + }, + { + "epoch": 2.52, + "learning_rate": 1.3888770633898762e-06, + "loss": 1.3894, + "step": 2325 + }, + { + "epoch": 2.52, + "learning_rate": 1.382816987652873e-06, + "loss": 1.4051, + "step": 2326 + }, + { + "epoch": 2.52, + "learning_rate": 1.3767691796199745e-06, + "loss": 1.7752, + "step": 2327 + }, + { + "epoch": 2.52, + "learning_rate": 1.3707336479010381e-06, + "loss": 1.4798, + "step": 2328 + }, + { + "epoch": 2.52, + "learning_rate": 1.3647104010884438e-06, + "loss": 1.6275, + "step": 2329 + }, + { + "epoch": 2.52, + "learning_rate": 1.3586994477570825e-06, + "loss": 1.4139, + "step": 2330 + }, + { + "epoch": 2.52, + "learning_rate": 1.3527007964643436e-06, + "loss": 1.5555, + "step": 2331 + }, + { + "epoch": 2.52, + "learning_rate": 1.3467144557501065e-06, + "loss": 1.7837, + "step": 2332 + }, + { + "epoch": 2.52, + "learning_rate": 1.3407404341367203e-06, + "loss": 1.4145, + "step": 2333 + }, + { + "epoch": 2.53, + "learning_rate": 1.334778740128998e-06, + "loss": 1.4779, + "step": 2334 + }, + { + "epoch": 2.53, + "learning_rate": 1.3288293822142017e-06, + "loss": 1.4364, + "step": 2335 + }, + { + "epoch": 2.53, + "learning_rate": 1.3228923688620342e-06, + "loss": 1.4961, + "step": 2336 + }, + { + "epoch": 2.53, + "learning_rate": 1.3169677085246213e-06, + "loss": 1.514, + "step": 2337 + }, + { + "epoch": 2.53, + "learning_rate": 1.3110554096365003e-06, + "loss": 1.5074, + "step": 2338 + }, + { + "epoch": 2.53, + "learning_rate": 1.3051554806146195e-06, + "loss": 1.4463, + "step": 2339 + }, + { + "epoch": 2.53, + "learning_rate": 1.2992679298583089e-06, + "loss": 1.6143, + "step": 2340 + }, + { + "epoch": 2.53, + "learning_rate": 1.2933927657492774e-06, + "loss": 1.3421, + "step": 2341 + }, + { + "epoch": 2.53, + "learning_rate": 1.287529996651602e-06, + "loss": 1.5018, + "step": 2342 + }, + { + "epoch": 2.54, + "learning_rate": 1.281679630911714e-06, + "loss": 1.2337, + "step": 2343 + }, + { + "epoch": 2.54, + "learning_rate": 1.2758416768583814e-06, + "loss": 1.3933, + "step": 2344 + }, + { + "epoch": 2.54, + "learning_rate": 1.2700161428027124e-06, + "loss": 1.1947, + "step": 2345 + }, + { + "epoch": 2.54, + "learning_rate": 1.2642030370381264e-06, + "loss": 1.4681, + "step": 2346 + }, + { + "epoch": 2.54, + "learning_rate": 1.2584023678403502e-06, + "loss": 1.7516, + "step": 2347 + }, + { + "epoch": 2.54, + "learning_rate": 1.2526141434674076e-06, + "loss": 1.3862, + "step": 2348 + }, + { + "epoch": 2.54, + "learning_rate": 1.2468383721596044e-06, + "loss": 1.7484, + "step": 2349 + }, + { + "epoch": 2.54, + "learning_rate": 1.2410750621395163e-06, + "loss": 1.4472, + "step": 2350 + }, + { + "epoch": 2.54, + "learning_rate": 1.2353242216119798e-06, + "loss": 1.3695, + "step": 2351 + }, + { + "epoch": 2.55, + "learning_rate": 1.229585858764084e-06, + "loss": 1.4722, + "step": 2352 + }, + { + "epoch": 2.55, + "learning_rate": 1.2238599817651486e-06, + "loss": 1.3181, + "step": 2353 + }, + { + "epoch": 2.55, + "learning_rate": 1.2181465987667174e-06, + "loss": 1.4107, + "step": 2354 + }, + { + "epoch": 2.55, + "learning_rate": 1.2124457179025527e-06, + "loss": 1.2216, + "step": 2355 + }, + { + "epoch": 2.55, + "learning_rate": 1.2067573472886108e-06, + "loss": 1.526, + "step": 2356 + }, + { + "epoch": 2.55, + "learning_rate": 1.2010814950230498e-06, + "loss": 1.392, + "step": 2357 + }, + { + "epoch": 2.55, + "learning_rate": 1.195418169186191e-06, + "loss": 1.3603, + "step": 2358 + }, + { + "epoch": 2.55, + "learning_rate": 1.1897673778405372e-06, + "loss": 1.3015, + "step": 2359 + }, + { + "epoch": 2.55, + "learning_rate": 1.1841291290307356e-06, + "loss": 1.355, + "step": 2360 + }, + { + "epoch": 2.56, + "learning_rate": 1.1785034307835853e-06, + "loss": 1.5695, + "step": 2361 + }, + { + "epoch": 2.56, + "learning_rate": 1.172890291108012e-06, + "loss": 1.3653, + "step": 2362 + }, + { + "epoch": 2.56, + "learning_rate": 1.1672897179950648e-06, + "loss": 1.6426, + "step": 2363 + }, + { + "epoch": 2.56, + "learning_rate": 1.161701719417908e-06, + "loss": 1.3159, + "step": 2364 + }, + { + "epoch": 2.56, + "learning_rate": 1.1561263033317926e-06, + "loss": 1.5476, + "step": 2365 + }, + { + "epoch": 2.56, + "learning_rate": 1.1505634776740693e-06, + "loss": 1.3, + "step": 2366 + }, + { + "epoch": 2.56, + "learning_rate": 1.1450132503641564e-06, + "loss": 1.3006, + "step": 2367 + }, + { + "epoch": 2.56, + "learning_rate": 1.1394756293035369e-06, + "loss": 1.3904, + "step": 2368 + }, + { + "epoch": 2.56, + "learning_rate": 1.1339506223757579e-06, + "loss": 1.2846, + "step": 2369 + }, + { + "epoch": 2.56, + "learning_rate": 1.128438237446391e-06, + "loss": 1.5087, + "step": 2370 + }, + { + "epoch": 2.57, + "learning_rate": 1.1229384823630552e-06, + "loss": 1.4214, + "step": 2371 + }, + { + "epoch": 2.57, + "learning_rate": 1.1174513649553742e-06, + "loss": 1.2962, + "step": 2372 + }, + { + "epoch": 2.57, + "learning_rate": 1.111976893034996e-06, + "loss": 1.2474, + "step": 2373 + }, + { + "epoch": 2.57, + "learning_rate": 1.1065150743955544e-06, + "loss": 1.4587, + "step": 2374 + }, + { + "epoch": 2.57, + "learning_rate": 1.1010659168126713e-06, + "loss": 1.4724, + "step": 2375 + }, + { + "epoch": 2.57, + "learning_rate": 1.0956294280439527e-06, + "loss": 1.3681, + "step": 2376 + }, + { + "epoch": 2.57, + "learning_rate": 1.0902056158289542e-06, + "loss": 1.5189, + "step": 2377 + }, + { + "epoch": 2.57, + "learning_rate": 1.084794487889199e-06, + "loss": 1.7258, + "step": 2378 + }, + { + "epoch": 2.57, + "learning_rate": 1.0793960519281433e-06, + "loss": 1.4144, + "step": 2379 + }, + { + "epoch": 2.58, + "learning_rate": 1.0740103156311753e-06, + "loss": 1.2554, + "step": 2380 + }, + { + "epoch": 2.58, + "learning_rate": 1.0686372866656124e-06, + "loss": 1.665, + "step": 2381 + }, + { + "epoch": 2.58, + "learning_rate": 1.063276972680667e-06, + "loss": 1.2705, + "step": 2382 + }, + { + "epoch": 2.58, + "learning_rate": 1.057929381307462e-06, + "loss": 1.382, + "step": 2383 + }, + { + "epoch": 2.58, + "learning_rate": 1.0525945201590037e-06, + "loss": 1.3876, + "step": 2384 + }, + { + "epoch": 2.58, + "learning_rate": 1.0472723968301712e-06, + "loss": 1.1645, + "step": 2385 + }, + { + "epoch": 2.58, + "learning_rate": 1.0419630188977213e-06, + "loss": 1.5499, + "step": 2386 + }, + { + "epoch": 2.58, + "learning_rate": 1.0366663939202515e-06, + "loss": 1.3362, + "step": 2387 + }, + { + "epoch": 2.58, + "learning_rate": 1.031382529438215e-06, + "loss": 1.5812, + "step": 2388 + }, + { + "epoch": 2.59, + "learning_rate": 1.0261114329738897e-06, + "loss": 1.2592, + "step": 2389 + }, + { + "epoch": 2.59, + "learning_rate": 1.0208531120313857e-06, + "loss": 1.5211, + "step": 2390 + }, + { + "epoch": 2.59, + "learning_rate": 1.0156075740966198e-06, + "loss": 1.2363, + "step": 2391 + }, + { + "epoch": 2.59, + "learning_rate": 1.010374826637308e-06, + "loss": 1.6139, + "step": 2392 + }, + { + "epoch": 2.59, + "learning_rate": 1.0051548771029696e-06, + "loss": 1.3463, + "step": 2393 + }, + { + "epoch": 2.59, + "learning_rate": 9.999477329248864e-07, + "loss": 1.3339, + "step": 2394 + }, + { + "epoch": 2.59, + "learning_rate": 9.947534015161254e-07, + "loss": 1.374, + "step": 2395 + }, + { + "epoch": 2.59, + "learning_rate": 9.895718902715057e-07, + "loss": 1.0881, + "step": 2396 + }, + { + "epoch": 2.59, + "learning_rate": 9.844032065675945e-07, + "loss": 1.0885, + "step": 2397 + }, + { + "epoch": 2.6, + "learning_rate": 9.792473577627026e-07, + "loss": 1.2909, + "step": 2398 + }, + { + "epoch": 2.6, + "learning_rate": 9.741043511968605e-07, + "loss": 1.6412, + "step": 2399 + }, + { + "epoch": 2.6, + "learning_rate": 9.689741941918273e-07, + "loss": 1.3653, + "step": 2400 + }, + { + "epoch": 2.6, + "learning_rate": 9.638568940510563e-07, + "loss": 1.2859, + "step": 2401 + }, + { + "epoch": 2.6, + "learning_rate": 9.587524580597086e-07, + "loss": 1.4281, + "step": 2402 + }, + { + "epoch": 2.6, + "learning_rate": 9.536608934846236e-07, + "loss": 1.2901, + "step": 2403 + }, + { + "epoch": 2.6, + "learning_rate": 9.4858220757432e-07, + "loss": 1.6859, + "step": 2404 + }, + { + "epoch": 2.6, + "learning_rate": 9.435164075589832e-07, + "loss": 1.6743, + "step": 2405 + }, + { + "epoch": 2.6, + "learning_rate": 9.384635006504483e-07, + "loss": 1.2505, + "step": 2406 + }, + { + "epoch": 2.6, + "learning_rate": 9.334234940422027e-07, + "loss": 1.3083, + "step": 2407 + }, + { + "epoch": 2.61, + "learning_rate": 9.283963949093655e-07, + "loss": 1.3136, + "step": 2408 + }, + { + "epoch": 2.61, + "learning_rate": 9.233822104086765e-07, + "loss": 1.4036, + "step": 2409 + }, + { + "epoch": 2.61, + "learning_rate": 9.183809476784955e-07, + "loss": 1.5002, + "step": 2410 + }, + { + "epoch": 2.61, + "learning_rate": 9.133926138387827e-07, + "loss": 1.5041, + "step": 2411 + }, + { + "epoch": 2.61, + "learning_rate": 9.084172159910942e-07, + "loss": 1.3798, + "step": 2412 + }, + { + "epoch": 2.61, + "learning_rate": 9.034547612185673e-07, + "loss": 1.3899, + "step": 2413 + }, + { + "epoch": 2.61, + "learning_rate": 8.985052565859198e-07, + "loss": 1.549, + "step": 2414 + }, + { + "epoch": 2.61, + "learning_rate": 8.935687091394251e-07, + "loss": 1.3845, + "step": 2415 + }, + { + "epoch": 2.61, + "learning_rate": 8.886451259069151e-07, + "loss": 1.5776, + "step": 2416 + }, + { + "epoch": 2.62, + "learning_rate": 8.837345138977638e-07, + "loss": 1.6024, + "step": 2417 + }, + { + "epoch": 2.62, + "learning_rate": 8.788368801028801e-07, + "loss": 1.3396, + "step": 2418 + }, + { + "epoch": 2.62, + "learning_rate": 8.739522314946936e-07, + "loss": 1.3144, + "step": 2419 + }, + { + "epoch": 2.62, + "learning_rate": 8.690805750271536e-07, + "loss": 1.3015, + "step": 2420 + }, + { + "epoch": 2.62, + "learning_rate": 8.642219176357081e-07, + "loss": 1.369, + "step": 2421 + }, + { + "epoch": 2.62, + "learning_rate": 8.593762662373018e-07, + "loss": 1.195, + "step": 2422 + }, + { + "epoch": 2.62, + "learning_rate": 8.545436277303609e-07, + "loss": 1.522, + "step": 2423 + }, + { + "epoch": 2.62, + "learning_rate": 8.497240089947901e-07, + "loss": 1.5346, + "step": 2424 + }, + { + "epoch": 2.62, + "learning_rate": 8.449174168919549e-07, + "loss": 1.2527, + "step": 2425 + }, + { + "epoch": 2.63, + "learning_rate": 8.401238582646775e-07, + "loss": 1.21, + "step": 2426 + }, + { + "epoch": 2.63, + "learning_rate": 8.353433399372257e-07, + "loss": 1.5814, + "step": 2427 + }, + { + "epoch": 2.63, + "learning_rate": 8.305758687153032e-07, + "loss": 1.2182, + "step": 2428 + }, + { + "epoch": 2.63, + "learning_rate": 8.258214513860363e-07, + "loss": 1.2452, + "step": 2429 + }, + { + "epoch": 2.63, + "learning_rate": 8.210800947179698e-07, + "loss": 1.3678, + "step": 2430 + }, + { + "epoch": 2.63, + "learning_rate": 8.163518054610531e-07, + "loss": 1.4426, + "step": 2431 + }, + { + "epoch": 2.63, + "learning_rate": 8.116365903466394e-07, + "loss": 1.469, + "step": 2432 + }, + { + "epoch": 2.63, + "learning_rate": 8.069344560874548e-07, + "loss": 1.431, + "step": 2433 + }, + { + "epoch": 2.63, + "learning_rate": 8.022454093776178e-07, + "loss": 1.5834, + "step": 2434 + }, + { + "epoch": 2.64, + "learning_rate": 7.975694568926085e-07, + "loss": 1.5362, + "step": 2435 + }, + { + "epoch": 2.64, + "learning_rate": 7.92906605289262e-07, + "loss": 1.5635, + "step": 2436 + }, + { + "epoch": 2.64, + "learning_rate": 7.882568612057728e-07, + "loss": 1.1414, + "step": 2437 + }, + { + "epoch": 2.64, + "learning_rate": 7.836202312616626e-07, + "loss": 1.8295, + "step": 2438 + }, + { + "epoch": 2.64, + "learning_rate": 7.789967220577965e-07, + "loss": 1.2892, + "step": 2439 + }, + { + "epoch": 2.64, + "learning_rate": 7.743863401763463e-07, + "loss": 1.3894, + "step": 2440 + }, + { + "epoch": 2.64, + "learning_rate": 7.697890921808082e-07, + "loss": 1.4946, + "step": 2441 + }, + { + "epoch": 2.64, + "learning_rate": 7.652049846159726e-07, + "loss": 1.5447, + "step": 2442 + }, + { + "epoch": 2.64, + "learning_rate": 7.606340240079257e-07, + "loss": 1.265, + "step": 2443 + }, + { + "epoch": 2.65, + "learning_rate": 7.56076216864039e-07, + "loss": 1.3459, + "step": 2444 + }, + { + "epoch": 2.65, + "learning_rate": 7.515315696729519e-07, + "loss": 1.4567, + "step": 2445 + }, + { + "epoch": 2.65, + "learning_rate": 7.470000889045758e-07, + "loss": 1.2605, + "step": 2446 + }, + { + "epoch": 2.65, + "learning_rate": 7.424817810100749e-07, + "loss": 1.4518, + "step": 2447 + }, + { + "epoch": 2.65, + "learning_rate": 7.37976652421859e-07, + "loss": 1.2771, + "step": 2448 + }, + { + "epoch": 2.65, + "learning_rate": 7.334847095535813e-07, + "loss": 1.2964, + "step": 2449 + }, + { + "epoch": 2.65, + "learning_rate": 7.290059588001119e-07, + "loss": 1.8313, + "step": 2450 + }, + { + "epoch": 2.65, + "learning_rate": 7.24540406537555e-07, + "loss": 1.4013, + "step": 2451 + }, + { + "epoch": 2.65, + "learning_rate": 7.200880591232084e-07, + "loss": 1.4062, + "step": 2452 + }, + { + "epoch": 2.65, + "learning_rate": 7.156489228955866e-07, + "loss": 1.569, + "step": 2453 + }, + { + "epoch": 2.66, + "learning_rate": 7.112230041743862e-07, + "loss": 1.6697, + "step": 2454 + }, + { + "epoch": 2.66, + "learning_rate": 7.068103092604894e-07, + "loss": 1.5741, + "step": 2455 + }, + { + "epoch": 2.66, + "learning_rate": 7.02410844435959e-07, + "loss": 1.3958, + "step": 2456 + }, + { + "epoch": 2.66, + "learning_rate": 6.980246159640092e-07, + "loss": 1.4013, + "step": 2457 + }, + { + "epoch": 2.66, + "learning_rate": 6.936516300890239e-07, + "loss": 1.4053, + "step": 2458 + }, + { + "epoch": 2.66, + "learning_rate": 6.892918930365289e-07, + "loss": 1.4796, + "step": 2459 + }, + { + "epoch": 2.66, + "learning_rate": 6.84945411013186e-07, + "loss": 1.6008, + "step": 2460 + }, + { + "epoch": 2.66, + "learning_rate": 6.806121902067919e-07, + "loss": 1.4403, + "step": 2461 + }, + { + "epoch": 2.66, + "learning_rate": 6.76292236786259e-07, + "loss": 1.3376, + "step": 2462 + }, + { + "epoch": 2.67, + "learning_rate": 6.719855569016187e-07, + "loss": 1.4014, + "step": 2463 + }, + { + "epoch": 2.67, + "learning_rate": 6.676921566839977e-07, + "loss": 1.2418, + "step": 2464 + }, + { + "epoch": 2.67, + "learning_rate": 6.634120422456225e-07, + "loss": 1.3975, + "step": 2465 + }, + { + "epoch": 2.67, + "learning_rate": 6.591452196798053e-07, + "loss": 1.5146, + "step": 2466 + }, + { + "epoch": 2.67, + "learning_rate": 6.548916950609341e-07, + "loss": 1.2588, + "step": 2467 + }, + { + "epoch": 2.67, + "learning_rate": 6.506514744444658e-07, + "loss": 1.678, + "step": 2468 + }, + { + "epoch": 2.67, + "learning_rate": 6.464245638669154e-07, + "loss": 1.5591, + "step": 2469 + }, + { + "epoch": 2.67, + "learning_rate": 6.422109693458545e-07, + "loss": 1.4771, + "step": 2470 + }, + { + "epoch": 2.67, + "learning_rate": 6.380106968798927e-07, + "loss": 1.544, + "step": 2471 + }, + { + "epoch": 2.68, + "learning_rate": 6.338237524486756e-07, + "loss": 1.5229, + "step": 2472 + }, + { + "epoch": 2.68, + "learning_rate": 6.296501420128753e-07, + "loss": 1.2298, + "step": 2473 + }, + { + "epoch": 2.68, + "learning_rate": 6.254898715141788e-07, + "loss": 1.3072, + "step": 2474 + }, + { + "epoch": 2.68, + "learning_rate": 6.21342946875283e-07, + "loss": 1.2121, + "step": 2475 + }, + { + "epoch": 2.68, + "learning_rate": 6.172093739998897e-07, + "loss": 1.452, + "step": 2476 + }, + { + "epoch": 2.68, + "learning_rate": 6.13089158772685e-07, + "loss": 1.2103, + "step": 2477 + }, + { + "epoch": 2.68, + "learning_rate": 6.089823070593437e-07, + "loss": 1.5263, + "step": 2478 + }, + { + "epoch": 2.68, + "learning_rate": 6.048888247065143e-07, + "loss": 1.3849, + "step": 2479 + }, + { + "epoch": 2.68, + "learning_rate": 6.008087175418131e-07, + "loss": 1.3651, + "step": 2480 + }, + { + "epoch": 2.69, + "learning_rate": 5.967419913738126e-07, + "loss": 1.1303, + "step": 2481 + }, + { + "epoch": 2.69, + "learning_rate": 5.926886519920372e-07, + "loss": 1.4894, + "step": 2482 + }, + { + "epoch": 2.69, + "learning_rate": 5.886487051669554e-07, + "loss": 1.5404, + "step": 2483 + }, + { + "epoch": 2.69, + "learning_rate": 5.846221566499688e-07, + "loss": 1.2261, + "step": 2484 + }, + { + "epoch": 2.69, + "learning_rate": 5.80609012173401e-07, + "loss": 1.1054, + "step": 2485 + }, + { + "epoch": 2.69, + "learning_rate": 5.766092774504983e-07, + "loss": 1.3206, + "step": 2486 + }, + { + "epoch": 2.69, + "learning_rate": 5.726229581754117e-07, + "loss": 1.7983, + "step": 2487 + }, + { + "epoch": 2.69, + "learning_rate": 5.686500600232003e-07, + "loss": 1.523, + "step": 2488 + }, + { + "epoch": 2.69, + "learning_rate": 5.646905886498055e-07, + "loss": 1.2592, + "step": 2489 + }, + { + "epoch": 2.69, + "learning_rate": 5.607445496920661e-07, + "loss": 1.637, + "step": 2490 + }, + { + "epoch": 2.7, + "learning_rate": 5.568119487676904e-07, + "loss": 1.462, + "step": 2491 + }, + { + "epoch": 2.7, + "learning_rate": 5.528927914752579e-07, + "loss": 1.4331, + "step": 2492 + }, + { + "epoch": 2.7, + "learning_rate": 5.489870833942102e-07, + "loss": 1.3316, + "step": 2493 + }, + { + "epoch": 2.7, + "learning_rate": 5.450948300848379e-07, + "loss": 1.522, + "step": 2494 + }, + { + "epoch": 2.7, + "learning_rate": 5.412160370882868e-07, + "loss": 1.5579, + "step": 2495 + }, + { + "epoch": 2.7, + "learning_rate": 5.373507099265274e-07, + "loss": 1.5086, + "step": 2496 + }, + { + "epoch": 2.7, + "learning_rate": 5.3349885410237e-07, + "loss": 1.3342, + "step": 2497 + }, + { + "epoch": 2.7, + "learning_rate": 5.296604750994416e-07, + "loss": 1.2813, + "step": 2498 + }, + { + "epoch": 2.7, + "learning_rate": 5.258355783821822e-07, + "loss": 1.0772, + "step": 2499 + }, + { + "epoch": 2.71, + "learning_rate": 5.22024169395845e-07, + "loss": 1.2934, + "step": 2500 + }, + { + "epoch": 2.71, + "learning_rate": 5.182262535664706e-07, + "loss": 1.3594, + "step": 2501 + }, + { + "epoch": 2.71, + "learning_rate": 5.14441836300903e-07, + "loss": 1.4169, + "step": 2502 + }, + { + "epoch": 2.71, + "learning_rate": 5.106709229867568e-07, + "loss": 1.397, + "step": 2503 + }, + { + "epoch": 2.71, + "learning_rate": 5.069135189924312e-07, + "loss": 1.371, + "step": 2504 + }, + { + "epoch": 2.71, + "learning_rate": 5.031696296670885e-07, + "loss": 1.073, + "step": 2505 + }, + { + "epoch": 2.71, + "learning_rate": 4.994392603406507e-07, + "loss": 1.3909, + "step": 2506 + }, + { + "epoch": 2.71, + "learning_rate": 4.957224163237995e-07, + "loss": 1.5981, + "step": 2507 + }, + { + "epoch": 2.71, + "learning_rate": 4.92019102907948e-07, + "loss": 1.2035, + "step": 2508 + }, + { + "epoch": 2.72, + "learning_rate": 4.883293253652599e-07, + "loss": 1.3625, + "step": 2509 + }, + { + "epoch": 2.72, + "learning_rate": 4.846530889486222e-07, + "loss": 1.4266, + "step": 2510 + }, + { + "epoch": 2.72, + "learning_rate": 4.809903988916431e-07, + "loss": 1.4738, + "step": 2511 + }, + { + "epoch": 2.72, + "learning_rate": 4.773412604086536e-07, + "loss": 1.2482, + "step": 2512 + }, + { + "epoch": 2.72, + "learning_rate": 4.737056786946803e-07, + "loss": 1.403, + "step": 2513 + }, + { + "epoch": 2.72, + "learning_rate": 4.7008365892546314e-07, + "loss": 1.1934, + "step": 2514 + }, + { + "epoch": 2.72, + "learning_rate": 4.6647520625742184e-07, + "loss": 1.4173, + "step": 2515 + }, + { + "epoch": 2.72, + "learning_rate": 4.628803258276715e-07, + "loss": 1.4158, + "step": 2516 + }, + { + "epoch": 2.72, + "learning_rate": 4.592990227540006e-07, + "loss": 1.2161, + "step": 2517 + }, + { + "epoch": 2.73, + "learning_rate": 4.557313021348697e-07, + "loss": 1.5801, + "step": 2518 + }, + { + "epoch": 2.73, + "learning_rate": 4.521771690494048e-07, + "loss": 1.6454, + "step": 2519 + }, + { + "epoch": 2.73, + "learning_rate": 4.486366285573818e-07, + "loss": 1.2621, + "step": 2520 + }, + { + "epoch": 2.73, + "learning_rate": 4.451096856992343e-07, + "loss": 1.5782, + "step": 2521 + }, + { + "epoch": 2.73, + "learning_rate": 4.4159634549603145e-07, + "loss": 1.3241, + "step": 2522 + }, + { + "epoch": 2.73, + "learning_rate": 4.3809661294948124e-07, + "loss": 1.5076, + "step": 2523 + }, + { + "epoch": 2.73, + "learning_rate": 4.3461049304191483e-07, + "loss": 1.4416, + "step": 2524 + }, + { + "epoch": 2.73, + "learning_rate": 4.3113799073628894e-07, + "loss": 1.2328, + "step": 2525 + }, + { + "epoch": 2.73, + "learning_rate": 4.276791109761713e-07, + "loss": 1.3776, + "step": 2526 + }, + { + "epoch": 2.73, + "learning_rate": 4.2423385868573643e-07, + "loss": 1.5137, + "step": 2527 + }, + { + "epoch": 2.74, + "learning_rate": 4.208022387697586e-07, + "loss": 1.4593, + "step": 2528 + }, + { + "epoch": 2.74, + "learning_rate": 4.1738425611360435e-07, + "loss": 1.515, + "step": 2529 + }, + { + "epoch": 2.74, + "learning_rate": 4.139799155832247e-07, + "loss": 1.758, + "step": 2530 + }, + { + "epoch": 2.74, + "learning_rate": 4.105892220251517e-07, + "loss": 1.6762, + "step": 2531 + }, + { + "epoch": 2.74, + "learning_rate": 4.0721218026648633e-07, + "loss": 1.5668, + "step": 2532 + }, + { + "epoch": 2.74, + "learning_rate": 4.038487951148973e-07, + "loss": 1.3416, + "step": 2533 + }, + { + "epoch": 2.74, + "learning_rate": 4.0049907135860986e-07, + "loss": 1.146, + "step": 2534 + }, + { + "epoch": 2.74, + "learning_rate": 3.9716301376639955e-07, + "loss": 1.3017, + "step": 2535 + }, + { + "epoch": 2.74, + "learning_rate": 3.938406270875883e-07, + "loss": 1.5709, + "step": 2536 + }, + { + "epoch": 2.75, + "learning_rate": 3.905319160520349e-07, + "loss": 1.3528, + "step": 2537 + }, + { + "epoch": 2.75, + "learning_rate": 3.872368853701258e-07, + "loss": 1.6733, + "step": 2538 + }, + { + "epoch": 2.75, + "learning_rate": 3.8395553973277876e-07, + "loss": 1.1098, + "step": 2539 + }, + { + "epoch": 2.75, + "learning_rate": 3.806878838114225e-07, + "loss": 1.2154, + "step": 2540 + }, + { + "epoch": 2.75, + "learning_rate": 3.7743392225800036e-07, + "loss": 1.3542, + "step": 2541 + }, + { + "epoch": 2.75, + "learning_rate": 3.741936597049578e-07, + "loss": 1.4238, + "step": 2542 + }, + { + "epoch": 2.75, + "learning_rate": 3.709671007652393e-07, + "loss": 1.2855, + "step": 2543 + }, + { + "epoch": 2.75, + "learning_rate": 3.6775425003227725e-07, + "loss": 1.4152, + "step": 2544 + }, + { + "epoch": 2.75, + "learning_rate": 3.6455511207999504e-07, + "loss": 1.1686, + "step": 2545 + }, + { + "epoch": 2.76, + "learning_rate": 3.6136969146278953e-07, + "loss": 1.4802, + "step": 2546 + }, + { + "epoch": 2.76, + "learning_rate": 3.581979927155288e-07, + "loss": 1.4657, + "step": 2547 + }, + { + "epoch": 2.76, + "learning_rate": 3.550400203535476e-07, + "loss": 1.3599, + "step": 2548 + }, + { + "epoch": 2.76, + "learning_rate": 3.518957788726374e-07, + "loss": 1.348, + "step": 2549 + }, + { + "epoch": 2.76, + "learning_rate": 3.487652727490454e-07, + "loss": 1.685, + "step": 2550 + }, + { + "epoch": 2.76, + "learning_rate": 3.4564850643946214e-07, + "loss": 1.4447, + "step": 2551 + }, + { + "epoch": 2.76, + "learning_rate": 3.425454843810183e-07, + "loss": 1.5913, + "step": 2552 + }, + { + "epoch": 2.76, + "learning_rate": 3.394562109912769e-07, + "loss": 1.3624, + "step": 2553 + }, + { + "epoch": 2.76, + "learning_rate": 3.36380690668231e-07, + "loss": 1.2554, + "step": 2554 + }, + { + "epoch": 2.77, + "learning_rate": 3.333189277902893e-07, + "loss": 1.3403, + "step": 2555 + }, + { + "epoch": 2.77, + "learning_rate": 3.3027092671627957e-07, + "loss": 1.3742, + "step": 2556 + }, + { + "epoch": 2.77, + "learning_rate": 3.2723669178543414e-07, + "loss": 1.5359, + "step": 2557 + }, + { + "epoch": 2.77, + "learning_rate": 3.242162273173921e-07, + "loss": 1.4275, + "step": 2558 + }, + { + "epoch": 2.77, + "learning_rate": 3.2120953761218376e-07, + "loss": 1.4927, + "step": 2559 + }, + { + "epoch": 2.77, + "learning_rate": 3.182166269502307e-07, + "loss": 1.6279, + "step": 2560 + }, + { + "epoch": 2.77, + "learning_rate": 3.152374995923413e-07, + "loss": 1.4738, + "step": 2561 + }, + { + "epoch": 2.77, + "learning_rate": 3.1227215977969407e-07, + "loss": 1.6822, + "step": 2562 + }, + { + "epoch": 2.77, + "learning_rate": 3.093206117338499e-07, + "loss": 1.4295, + "step": 2563 + }, + { + "epoch": 2.77, + "learning_rate": 3.063828596567242e-07, + "loss": 1.3922, + "step": 2564 + }, + { + "epoch": 2.78, + "learning_rate": 3.034589077306005e-07, + "loss": 1.5716, + "step": 2565 + }, + { + "epoch": 2.78, + "learning_rate": 3.005487601181112e-07, + "loss": 1.3622, + "step": 2566 + }, + { + "epoch": 2.78, + "learning_rate": 2.976524209622367e-07, + "loss": 1.4171, + "step": 2567 + }, + { + "epoch": 2.78, + "learning_rate": 2.947698943863031e-07, + "loss": 1.5839, + "step": 2568 + }, + { + "epoch": 2.78, + "learning_rate": 2.9190118449396565e-07, + "loss": 1.3866, + "step": 2569 + }, + { + "epoch": 2.78, + "learning_rate": 2.8904629536921856e-07, + "loss": 1.3458, + "step": 2570 + }, + { + "epoch": 2.78, + "learning_rate": 2.8620523107637186e-07, + "loss": 1.652, + "step": 2571 + }, + { + "epoch": 2.78, + "learning_rate": 2.8337799566005907e-07, + "loss": 1.7191, + "step": 2572 + }, + { + "epoch": 2.78, + "learning_rate": 2.805645931452261e-07, + "loss": 1.5986, + "step": 2573 + }, + { + "epoch": 2.79, + "learning_rate": 2.7776502753712243e-07, + "loss": 1.5402, + "step": 2574 + }, + { + "epoch": 2.79, + "learning_rate": 2.749793028213055e-07, + "loss": 1.4443, + "step": 2575 + }, + { + "epoch": 2.79, + "learning_rate": 2.7220742296361845e-07, + "loss": 1.2336, + "step": 2576 + }, + { + "epoch": 2.79, + "learning_rate": 2.694493919102059e-07, + "loss": 1.4248, + "step": 2577 + }, + { + "epoch": 2.79, + "learning_rate": 2.667052135874881e-07, + "loss": 1.492, + "step": 2578 + }, + { + "epoch": 2.79, + "learning_rate": 2.6397489190216786e-07, + "loss": 1.4607, + "step": 2579 + }, + { + "epoch": 2.79, + "learning_rate": 2.6125843074122246e-07, + "loss": 1.3069, + "step": 2580 + }, + { + "epoch": 2.79, + "learning_rate": 2.5855583397189077e-07, + "loss": 1.2833, + "step": 2581 + }, + { + "epoch": 2.79, + "learning_rate": 2.558671054416839e-07, + "loss": 1.4302, + "step": 2582 + }, + { + "epoch": 2.8, + "learning_rate": 2.531922489783578e-07, + "loss": 1.2546, + "step": 2583 + }, + { + "epoch": 2.8, + "learning_rate": 2.505312683899297e-07, + "loss": 1.5225, + "step": 2584 + }, + { + "epoch": 2.8, + "learning_rate": 2.4788416746465813e-07, + "loss": 1.5358, + "step": 2585 + }, + { + "epoch": 2.8, + "learning_rate": 2.452509499710409e-07, + "loss": 1.1396, + "step": 2586 + }, + { + "epoch": 2.8, + "learning_rate": 2.42631619657816e-07, + "loss": 1.3756, + "step": 2587 + }, + { + "epoch": 2.8, + "learning_rate": 2.4002618025394495e-07, + "loss": 1.4283, + "step": 2588 + }, + { + "epoch": 2.8, + "learning_rate": 2.3743463546861744e-07, + "loss": 1.2582, + "step": 2589 + }, + { + "epoch": 2.8, + "learning_rate": 2.3485698899124333e-07, + "loss": 1.4219, + "step": 2590 + }, + { + "epoch": 2.8, + "learning_rate": 2.3229324449144165e-07, + "loss": 1.5266, + "step": 2591 + }, + { + "epoch": 2.81, + "learning_rate": 2.2974340561904506e-07, + "loss": 1.3222, + "step": 2592 + }, + { + "epoch": 2.81, + "learning_rate": 2.2720747600408655e-07, + "loss": 1.581, + "step": 2593 + }, + { + "epoch": 2.81, + "learning_rate": 2.2468545925680152e-07, + "loss": 1.6482, + "step": 2594 + }, + { + "epoch": 2.81, + "learning_rate": 2.2217735896761128e-07, + "loss": 1.3587, + "step": 2595 + }, + { + "epoch": 2.81, + "learning_rate": 2.1968317870713186e-07, + "loss": 1.4174, + "step": 2596 + }, + { + "epoch": 2.81, + "learning_rate": 2.1720292202616066e-07, + "loss": 1.2659, + "step": 2597 + }, + { + "epoch": 2.81, + "learning_rate": 2.14736592455671e-07, + "loss": 1.4968, + "step": 2598 + }, + { + "epoch": 2.81, + "learning_rate": 2.1228419350681206e-07, + "loss": 1.4472, + "step": 2599 + }, + { + "epoch": 2.81, + "learning_rate": 2.0984572867089658e-07, + "loss": 1.3207, + "step": 2600 + }, + { + "epoch": 2.81, + "learning_rate": 2.0742120141940548e-07, + "loss": 1.7412, + "step": 2601 + }, + { + "epoch": 2.82, + "learning_rate": 2.0501061520397547e-07, + "loss": 1.4375, + "step": 2602 + }, + { + "epoch": 2.82, + "learning_rate": 2.0261397345639366e-07, + "loss": 1.5255, + "step": 2603 + }, + { + "epoch": 2.82, + "learning_rate": 2.0023127958860078e-07, + "loss": 1.6913, + "step": 2604 + }, + { + "epoch": 2.82, + "learning_rate": 1.9786253699267567e-07, + "loss": 1.4362, + "step": 2605 + }, + { + "epoch": 2.82, + "learning_rate": 1.9550774904083747e-07, + "loss": 1.3471, + "step": 2606 + }, + { + "epoch": 2.82, + "learning_rate": 1.9316691908544127e-07, + "loss": 1.363, + "step": 2607 + }, + { + "epoch": 2.82, + "learning_rate": 1.9084005045896803e-07, + "loss": 1.6219, + "step": 2608 + }, + { + "epoch": 2.82, + "learning_rate": 1.8852714647402571e-07, + "loss": 1.548, + "step": 2609 + }, + { + "epoch": 2.82, + "learning_rate": 1.8622821042333927e-07, + "loss": 1.2939, + "step": 2610 + }, + { + "epoch": 2.83, + "learning_rate": 1.8394324557974962e-07, + "loss": 1.4058, + "step": 2611 + }, + { + "epoch": 2.83, + "learning_rate": 1.816722551962069e-07, + "loss": 1.7237, + "step": 2612 + }, + { + "epoch": 2.83, + "learning_rate": 1.7941524250576602e-07, + "loss": 1.7418, + "step": 2613 + }, + { + "epoch": 2.83, + "learning_rate": 1.7717221072158786e-07, + "loss": 1.4954, + "step": 2614 + }, + { + "epoch": 2.83, + "learning_rate": 1.7494316303692472e-07, + "loss": 1.3937, + "step": 2615 + }, + { + "epoch": 2.83, + "learning_rate": 1.727281026251204e-07, + "loss": 1.6045, + "step": 2616 + }, + { + "epoch": 2.83, + "learning_rate": 1.7052703263960912e-07, + "loss": 1.2003, + "step": 2617 + }, + { + "epoch": 2.83, + "learning_rate": 1.683399562139043e-07, + "loss": 1.571, + "step": 2618 + }, + { + "epoch": 2.83, + "learning_rate": 1.6616687646160202e-07, + "loss": 1.5339, + "step": 2619 + }, + { + "epoch": 2.84, + "learning_rate": 1.6400779647636754e-07, + "loss": 1.3924, + "step": 2620 + }, + { + "epoch": 2.84, + "learning_rate": 1.6186271933193997e-07, + "loss": 1.2853, + "step": 2621 + }, + { + "epoch": 2.84, + "learning_rate": 1.5973164808212094e-07, + "loss": 1.3534, + "step": 2622 + }, + { + "epoch": 2.84, + "learning_rate": 1.5761458576077138e-07, + "loss": 1.3648, + "step": 2623 + }, + { + "epoch": 2.84, + "learning_rate": 1.5551153538181374e-07, + "loss": 1.5253, + "step": 2624 + }, + { + "epoch": 2.84, + "learning_rate": 1.5342249993921532e-07, + "loss": 1.4273, + "step": 2625 + }, + { + "epoch": 2.84, + "learning_rate": 1.5134748240700048e-07, + "loss": 1.5722, + "step": 2626 + }, + { + "epoch": 2.84, + "learning_rate": 1.4928648573922732e-07, + "loss": 1.4696, + "step": 2627 + }, + { + "epoch": 2.84, + "learning_rate": 1.4723951287000214e-07, + "loss": 1.5012, + "step": 2628 + }, + { + "epoch": 2.85, + "learning_rate": 1.4520656671346056e-07, + "loss": 1.505, + "step": 2629 + }, + { + "epoch": 2.85, + "learning_rate": 1.4318765016377078e-07, + "loss": 1.5803, + "step": 2630 + }, + { + "epoch": 2.85, + "learning_rate": 1.4118276609513038e-07, + "loss": 1.4299, + "step": 2631 + }, + { + "epoch": 2.85, + "learning_rate": 1.391919173617562e-07, + "loss": 1.3572, + "step": 2632 + }, + { + "epoch": 2.85, + "learning_rate": 1.3721510679788774e-07, + "loss": 1.3128, + "step": 2633 + }, + { + "epoch": 2.85, + "learning_rate": 1.3525233721777498e-07, + "loss": 1.2744, + "step": 2634 + }, + { + "epoch": 2.85, + "learning_rate": 1.3330361141568383e-07, + "loss": 1.2885, + "step": 2635 + }, + { + "epoch": 2.85, + "learning_rate": 1.3136893216588175e-07, + "loss": 1.2948, + "step": 2636 + }, + { + "epoch": 2.85, + "learning_rate": 1.2944830222264115e-07, + "loss": 1.5285, + "step": 2637 + }, + { + "epoch": 2.85, + "learning_rate": 1.2754172432023703e-07, + "loss": 1.3903, + "step": 2638 + }, + { + "epoch": 2.86, + "learning_rate": 1.2564920117293266e-07, + "loss": 1.3907, + "step": 2639 + }, + { + "epoch": 2.86, + "learning_rate": 1.237707354749884e-07, + "loss": 1.3469, + "step": 2640 + }, + { + "epoch": 2.86, + "learning_rate": 1.2190632990064734e-07, + "loss": 1.4327, + "step": 2641 + }, + { + "epoch": 2.86, + "learning_rate": 1.2005598710414067e-07, + "loss": 1.1509, + "step": 2642 + }, + { + "epoch": 2.86, + "learning_rate": 1.1821970971967579e-07, + "loss": 1.2846, + "step": 2643 + }, + { + "epoch": 2.86, + "learning_rate": 1.1639750036143704e-07, + "loss": 1.5876, + "step": 2644 + }, + { + "epoch": 2.86, + "learning_rate": 1.145893616235827e-07, + "loss": 1.386, + "step": 2645 + }, + { + "epoch": 2.86, + "learning_rate": 1.1279529608023698e-07, + "loss": 1.4907, + "step": 2646 + }, + { + "epoch": 2.86, + "learning_rate": 1.1101530628549128e-07, + "loss": 1.4283, + "step": 2647 + }, + { + "epoch": 2.87, + "learning_rate": 1.0924939477339635e-07, + "loss": 1.48, + "step": 2648 + }, + { + "epoch": 2.87, + "learning_rate": 1.074975640579623e-07, + "loss": 1.4688, + "step": 2649 + }, + { + "epoch": 2.87, + "learning_rate": 1.0575981663315416e-07, + "loss": 1.3441, + "step": 2650 + }, + { + "epoch": 2.87, + "learning_rate": 1.04036154972883e-07, + "loss": 1.2524, + "step": 2651 + }, + { + "epoch": 2.87, + "learning_rate": 1.0232658153101261e-07, + "loss": 1.5244, + "step": 2652 + }, + { + "epoch": 2.87, + "learning_rate": 1.0063109874134724e-07, + "loss": 1.3212, + "step": 2653 + }, + { + "epoch": 2.87, + "learning_rate": 9.894970901763057e-08, + "loss": 1.514, + "step": 2654 + }, + { + "epoch": 2.87, + "learning_rate": 9.728241475354561e-08, + "loss": 1.1713, + "step": 2655 + }, + { + "epoch": 2.87, + "learning_rate": 9.562921832270588e-08, + "loss": 1.337, + "step": 2656 + }, + { + "epoch": 2.88, + "learning_rate": 9.399012207865765e-08, + "loss": 1.5239, + "step": 2657 + }, + { + "epoch": 2.88, + "learning_rate": 9.236512835486989e-08, + "loss": 1.5626, + "step": 2658 + }, + { + "epoch": 2.88, + "learning_rate": 9.075423946473871e-08, + "loss": 1.5068, + "step": 2659 + }, + { + "epoch": 2.88, + "learning_rate": 8.915745770157747e-08, + "loss": 1.7189, + "step": 2660 + }, + { + "epoch": 2.88, + "learning_rate": 8.757478533861663e-08, + "loss": 1.6673, + "step": 2661 + }, + { + "epoch": 2.88, + "learning_rate": 8.600622462900165e-08, + "loss": 1.3928, + "step": 2662 + }, + { + "epoch": 2.88, + "learning_rate": 8.445177780578517e-08, + "loss": 1.3171, + "step": 2663 + }, + { + "epoch": 2.88, + "learning_rate": 8.291144708193033e-08, + "loss": 1.2642, + "step": 2664 + }, + { + "epoch": 2.88, + "learning_rate": 8.138523465030191e-08, + "loss": 1.3229, + "step": 2665 + }, + { + "epoch": 2.89, + "learning_rate": 7.98731426836663e-08, + "loss": 1.3029, + "step": 2666 + }, + { + "epoch": 2.89, + "learning_rate": 7.837517333468603e-08, + "loss": 1.2242, + "step": 2667 + }, + { + "epoch": 2.89, + "learning_rate": 7.689132873592076e-08, + "loss": 1.6488, + "step": 2668 + }, + { + "epoch": 2.89, + "learning_rate": 7.542161099981849e-08, + "loss": 1.8174, + "step": 2669 + }, + { + "epoch": 2.89, + "learning_rate": 7.396602221871885e-08, + "loss": 1.3012, + "step": 2670 + }, + { + "epoch": 2.89, + "learning_rate": 7.252456446484534e-08, + "loss": 1.409, + "step": 2671 + }, + { + "epoch": 2.89, + "learning_rate": 7.109723979030536e-08, + "loss": 1.4102, + "step": 2672 + }, + { + "epoch": 2.89, + "learning_rate": 6.968405022708347e-08, + "loss": 1.3756, + "step": 2673 + }, + { + "epoch": 2.89, + "learning_rate": 6.828499778704367e-08, + "loss": 1.244, + "step": 2674 + }, + { + "epoch": 2.9, + "learning_rate": 6.690008446192276e-08, + "loss": 1.2074, + "step": 2675 + }, + { + "epoch": 2.9, + "learning_rate": 6.552931222332803e-08, + "loss": 1.4653, + "step": 2676 + }, + { + "epoch": 2.9, + "learning_rate": 6.417268302273739e-08, + "loss": 1.408, + "step": 2677 + }, + { + "epoch": 2.9, + "learning_rate": 6.283019879149144e-08, + "loss": 1.7122, + "step": 2678 + }, + { + "epoch": 2.9, + "learning_rate": 6.150186144079473e-08, + "loss": 1.217, + "step": 2679 + }, + { + "epoch": 2.9, + "learning_rate": 6.018767286171234e-08, + "loss": 1.5713, + "step": 2680 + }, + { + "epoch": 2.9, + "learning_rate": 5.888763492516436e-08, + "loss": 1.4467, + "step": 2681 + }, + { + "epoch": 2.9, + "learning_rate": 5.760174948193031e-08, + "loss": 1.7328, + "step": 2682 + }, + { + "epoch": 2.9, + "learning_rate": 5.633001836263696e-08, + "loss": 1.3985, + "step": 2683 + }, + { + "epoch": 2.9, + "learning_rate": 5.507244337776274e-08, + "loss": 1.6614, + "step": 2684 + }, + { + "epoch": 2.91, + "learning_rate": 5.382902631763331e-08, + "loss": 1.5567, + "step": 2685 + }, + { + "epoch": 2.91, + "learning_rate": 5.259976895241714e-08, + "loss": 1.4546, + "step": 2686 + }, + { + "epoch": 2.91, + "learning_rate": 5.138467303212546e-08, + "loss": 1.6378, + "step": 2687 + }, + { + "epoch": 2.91, + "learning_rate": 5.018374028660788e-08, + "loss": 1.4673, + "step": 2688 + }, + { + "epoch": 2.91, + "learning_rate": 4.899697242555346e-08, + "loss": 1.2791, + "step": 2689 + }, + { + "epoch": 2.91, + "learning_rate": 4.7824371138481815e-08, + "loss": 1.4178, + "step": 2690 + }, + { + "epoch": 2.91, + "learning_rate": 4.666593809474762e-08, + "loss": 1.4014, + "step": 2691 + }, + { + "epoch": 2.91, + "learning_rate": 4.5521674943534985e-08, + "loss": 1.1117, + "step": 2692 + }, + { + "epoch": 2.91, + "learning_rate": 4.439158331385196e-08, + "loss": 1.4885, + "step": 2693 + }, + { + "epoch": 2.92, + "learning_rate": 4.327566481453715e-08, + "loss": 1.2032, + "step": 2694 + }, + { + "epoch": 2.92, + "learning_rate": 4.2173921034246444e-08, + "loss": 1.566, + "step": 2695 + }, + { + "epoch": 2.92, + "learning_rate": 4.108635354145851e-08, + "loss": 1.3328, + "step": 2696 + }, + { + "epoch": 2.92, + "learning_rate": 4.001296388447151e-08, + "loss": 1.3768, + "step": 2697 + }, + { + "epoch": 2.92, + "learning_rate": 3.8953753591396415e-08, + "loss": 1.5365, + "step": 2698 + }, + { + "epoch": 2.92, + "learning_rate": 3.790872417016034e-08, + "loss": 1.4503, + "step": 2699 + }, + { + "epoch": 2.92, + "learning_rate": 3.68778771085021e-08, + "loss": 1.0304, + "step": 2700 + }, + { + "epoch": 2.92, + "learning_rate": 3.5861213873968904e-08, + "loss": 1.6038, + "step": 2701 + }, + { + "epoch": 2.92, + "learning_rate": 3.48587359139152e-08, + "loss": 1.601, + "step": 2702 + }, + { + "epoch": 2.93, + "learning_rate": 3.38704446555016e-08, + "loss": 1.4354, + "step": 2703 + }, + { + "epoch": 2.93, + "learning_rate": 3.289634150569376e-08, + "loss": 1.4827, + "step": 2704 + }, + { + "epoch": 2.93, + "learning_rate": 3.1936427851253503e-08, + "loss": 1.0858, + "step": 2705 + }, + { + "epoch": 2.93, + "learning_rate": 3.0990705058748794e-08, + "loss": 1.3856, + "step": 2706 + }, + { + "epoch": 2.93, + "learning_rate": 3.0059174474539324e-08, + "loss": 1.4821, + "step": 2707 + }, + { + "epoch": 2.93, + "learning_rate": 2.914183742478427e-08, + "loss": 1.5497, + "step": 2708 + }, + { + "epoch": 2.93, + "learning_rate": 2.8238695215432323e-08, + "loss": 1.5207, + "step": 2709 + }, + { + "epoch": 2.93, + "learning_rate": 2.734974913222943e-08, + "loss": 1.5781, + "step": 2710 + }, + { + "epoch": 2.93, + "learning_rate": 2.64750004407055e-08, + "loss": 1.6297, + "step": 2711 + }, + { + "epoch": 2.94, + "learning_rate": 2.5614450386182155e-08, + "loss": 1.5763, + "step": 2712 + }, + { + "epoch": 2.94, + "learning_rate": 2.4768100193768295e-08, + "loss": 1.3805, + "step": 2713 + }, + { + "epoch": 2.94, + "learning_rate": 2.3935951068353446e-08, + "loss": 1.5306, + "step": 2714 + }, + { + "epoch": 2.94, + "learning_rate": 2.3118004194614406e-08, + "loss": 1.4369, + "step": 2715 + }, + { + "epoch": 2.94, + "learning_rate": 2.2314260737006376e-08, + "loss": 1.2034, + "step": 2716 + }, + { + "epoch": 2.94, + "learning_rate": 2.152472183976406e-08, + "loss": 1.5095, + "step": 2717 + }, + { + "epoch": 2.94, + "learning_rate": 2.074938862690279e-08, + "loss": 1.4576, + "step": 2718 + }, + { + "epoch": 2.94, + "learning_rate": 1.998826220220962e-08, + "loss": 1.2408, + "step": 2719 + }, + { + "epoch": 2.94, + "learning_rate": 1.924134364925112e-08, + "loss": 1.4286, + "step": 2720 + }, + { + "epoch": 2.94, + "learning_rate": 1.850863403136449e-08, + "loss": 1.3504, + "step": 2721 + }, + { + "epoch": 2.95, + "learning_rate": 1.7790134391659775e-08, + "loss": 1.3752, + "step": 2722 + }, + { + "epoch": 2.95, + "learning_rate": 1.708584575301542e-08, + "loss": 1.4447, + "step": 2723 + }, + { + "epoch": 2.95, + "learning_rate": 1.6395769118080495e-08, + "loss": 1.2743, + "step": 2724 + }, + { + "epoch": 2.95, + "learning_rate": 1.571990546927138e-08, + "loss": 1.3814, + "step": 2725 + }, + { + "epoch": 2.95, + "learning_rate": 1.5058255768767295e-08, + "loss": 1.3547, + "step": 2726 + }, + { + "epoch": 2.95, + "learning_rate": 1.441082095851698e-08, + "loss": 1.4415, + "step": 2727 + }, + { + "epoch": 2.95, + "learning_rate": 1.3777601960229814e-08, + "loss": 1.4448, + "step": 2728 + }, + { + "epoch": 2.95, + "learning_rate": 1.3158599675374695e-08, + "loss": 1.4281, + "step": 2729 + }, + { + "epoch": 2.95, + "learning_rate": 1.2553814985186707e-08, + "loss": 1.493, + "step": 2730 + }, + { + "epoch": 2.96, + "learning_rate": 1.19632487506538e-08, + "loss": 1.4782, + "step": 2731 + }, + { + "epoch": 2.96, + "learning_rate": 1.1386901812527883e-08, + "loss": 1.348, + "step": 2732 + }, + { + "epoch": 2.96, + "learning_rate": 1.0824774991314845e-08, + "loss": 1.3377, + "step": 2733 + }, + { + "epoch": 2.96, + "learning_rate": 1.0276869087276764e-08, + "loss": 1.4975, + "step": 2734 + }, + { + "epoch": 2.96, + "learning_rate": 9.74318488042969e-09, + "loss": 1.0908, + "step": 2735 + }, + { + "epoch": 2.96, + "learning_rate": 9.223723130544759e-09, + "loss": 1.5807, + "step": 2736 + }, + { + "epoch": 2.96, + "learning_rate": 8.718484577144859e-09, + "loss": 1.3614, + "step": 2737 + }, + { + "epoch": 2.96, + "learning_rate": 8.227469939503518e-09, + "loss": 1.6107, + "step": 2738 + }, + { + "epoch": 2.96, + "learning_rate": 7.75067991664602e-09, + "loss": 1.3086, + "step": 2739 + }, + { + "epoch": 2.97, + "learning_rate": 7.288115187344957e-09, + "loss": 1.4008, + "step": 2740 + }, + { + "epoch": 2.97, + "learning_rate": 6.839776410124677e-09, + "loss": 1.4084, + "step": 2741 + }, + { + "epoch": 2.97, + "learning_rate": 6.4056642232523945e-09, + "loss": 1.5999, + "step": 2742 + }, + { + "epoch": 2.97, + "learning_rate": 5.985779244747081e-09, + "loss": 1.732, + "step": 2743 + }, + { + "epoch": 2.97, + "learning_rate": 5.5801220723683545e-09, + "loss": 1.4423, + "step": 2744 + }, + { + "epoch": 2.97, + "learning_rate": 5.1886932836253675e-09, + "loss": 1.5881, + "step": 2745 + }, + { + "epoch": 2.97, + "learning_rate": 4.811493435766812e-09, + "loss": 1.3558, + "step": 2746 + }, + { + "epoch": 2.97, + "learning_rate": 4.44852306578869e-09, + "loss": 1.3489, + "step": 2747 + }, + { + "epoch": 2.97, + "learning_rate": 4.099782690425435e-09, + "loss": 1.2391, + "step": 2748 + }, + { + "epoch": 2.98, + "learning_rate": 3.7652728061576824e-09, + "loss": 1.8044, + "step": 2749 + }, + { + "epoch": 2.98, + "learning_rate": 3.444993889202275e-09, + "loss": 1.3535, + "step": 2750 + }, + { + "epoch": 2.98, + "learning_rate": 3.1389463955200373e-09, + "loss": 1.3158, + "step": 2751 + }, + { + "epoch": 2.98, + "learning_rate": 2.8471307608102238e-09, + "loss": 1.4599, + "step": 2752 + }, + { + "epoch": 2.98, + "learning_rate": 2.5695474005116295e-09, + "loss": 1.2495, + "step": 2753 + }, + { + "epoch": 2.98, + "learning_rate": 2.3061967097992575e-09, + "loss": 1.6066, + "step": 2754 + }, + { + "epoch": 2.98, + "learning_rate": 2.057079063589873e-09, + "loss": 1.5093, + "step": 2755 + }, + { + "epoch": 2.98, + "learning_rate": 1.8221948165342285e-09, + "loss": 1.4025, + "step": 2756 + }, + { + "epoch": 2.98, + "learning_rate": 1.6015443030215073e-09, + "loss": 1.3788, + "step": 2757 + }, + { + "epoch": 2.98, + "learning_rate": 1.3951278371759913e-09, + "loss": 1.4646, + "step": 2758 + }, + { + "epoch": 2.99, + "learning_rate": 1.2029457128615029e-09, + "loss": 1.353, + "step": 2759 + }, + { + "epoch": 2.99, + "learning_rate": 1.0249982036725226e-09, + "loss": 1.4964, + "step": 2760 + }, + { + "epoch": 2.99, + "learning_rate": 8.612855629419603e-10, + "loss": 1.3455, + "step": 2761 + }, + { + "epoch": 2.99, + "learning_rate": 7.11808023735605e-10, + "loss": 1.7719, + "step": 2762 + }, + { + "epoch": 2.99, + "learning_rate": 5.765657988554551e-10, + "loss": 1.4145, + "step": 2763 + }, + { + "epoch": 2.99, + "learning_rate": 4.555590808374977e-10, + "loss": 1.4484, + "step": 2764 + }, + { + "epoch": 2.99, + "learning_rate": 3.4878804194948825e-10, + "loss": 1.3246, + "step": 2765 + }, + { + "epoch": 2.99, + "learning_rate": 2.562528341942816e-10, + "loss": 1.4381, + "step": 2766 + }, + { + "epoch": 2.99, + "learning_rate": 1.779535893076112e-10, + "loss": 1.2841, + "step": 2767 + }, + { + "epoch": 3.0, + "learning_rate": 1.1389041876030959e-10, + "loss": 1.1594, + "step": 2768 + }, + { + "epoch": 3.0, + "learning_rate": 6.406341375497782e-11, + "loss": 1.278, + "step": 2769 + }, + { + "epoch": 3.0, + "learning_rate": 2.847264522487514e-11, + "loss": 1.2287, + "step": 2770 + }, + { + "epoch": 3.0, + "learning_rate": 7.118163839470171e-12, + "loss": 1.2849, + "step": 2771 + }, + { + "epoch": 3.0, + "learning_rate": 0.0, + "loss": 1.6482, + "step": 2772 + } + ], + "logging_steps": 1.0, + "max_steps": 2772, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 3.786131489826013e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}