diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,4741 +1,10525 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 3.0, - "global_step": 786, + "epoch": 10.0, + "global_step": 1750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0, - "learning_rate": 4.993638676844784e-05, - "loss": 3.5902, + "epoch": 0.01, + "learning_rate": 4.9971428571428576e-05, + "loss": 3.6087, "step": 1 }, { "epoch": 0.01, - "learning_rate": 4.9872773536895677e-05, - "loss": 3.2854, + "learning_rate": 4.994285714285715e-05, + "loss": 3.2923, "step": 2 }, { - "epoch": 0.01, - "learning_rate": 4.9809160305343514e-05, - "loss": 3.2426, + "epoch": 0.02, + "learning_rate": 4.9914285714285717e-05, + "loss": 3.2591, "step": 3 }, { "epoch": 0.02, - "learning_rate": 4.974554707379135e-05, - "loss": 2.9834, + "learning_rate": 4.9885714285714283e-05, + "loss": 3.039, "step": 4 }, { - "epoch": 0.02, - "learning_rate": 4.968193384223919e-05, - "loss": 2.9, + "epoch": 0.03, + "learning_rate": 4.985714285714286e-05, + "loss": 2.8404, "step": 5 }, { - "epoch": 0.02, - "learning_rate": 4.9618320610687025e-05, - "loss": 2.8669, + "epoch": 0.03, + "learning_rate": 4.982857142857143e-05, + "loss": 2.7964, "step": 6 }, { - "epoch": 0.03, - "learning_rate": 4.955470737913486e-05, - "loss": 2.6986, + "epoch": 0.04, + "learning_rate": 4.9800000000000004e-05, + "loss": 2.6194, "step": 7 }, { - "epoch": 0.03, - "learning_rate": 4.94910941475827e-05, - "loss": 2.6329, + "epoch": 0.05, + "learning_rate": 4.977142857142857e-05, + "loss": 2.5721, "step": 8 }, { - "epoch": 0.03, - "learning_rate": 4.9427480916030536e-05, - "loss": 2.5975, + "epoch": 0.05, + "learning_rate": 4.9742857142857145e-05, + "loss": 2.5876, "step": 9 }, { - "epoch": 0.04, - "learning_rate": 4.936386768447838e-05, - "loss": 2.5687, + "epoch": 0.06, + "learning_rate": 4.971428571428572e-05, + "loss": 2.4753, "step": 10 }, { - "epoch": 0.04, - "learning_rate": 4.930025445292621e-05, - "loss": 2.3833, + "epoch": 0.06, + "learning_rate": 4.9685714285714285e-05, + "loss": 2.5941, "step": 11 }, { - "epoch": 0.05, - "learning_rate": 4.923664122137405e-05, - "loss": 2.4913, + "epoch": 0.07, + "learning_rate": 4.965714285714286e-05, + "loss": 2.5473, "step": 12 }, { - "epoch": 0.05, - "learning_rate": 4.9173027989821884e-05, - "loss": 2.5164, + "epoch": 0.07, + "learning_rate": 4.962857142857143e-05, + "loss": 2.5097, "step": 13 }, { - "epoch": 0.05, - "learning_rate": 4.910941475826972e-05, - "loss": 2.3806, + "epoch": 0.08, + "learning_rate": 4.96e-05, + "loss": 2.3575, "step": 14 }, { - "epoch": 0.06, - "learning_rate": 4.904580152671756e-05, - "loss": 2.3433, + "epoch": 0.09, + "learning_rate": 4.957142857142857e-05, + "loss": 2.3955, "step": 15 }, { - "epoch": 0.06, - "learning_rate": 4.8982188295165396e-05, - "loss": 2.512, + "epoch": 0.09, + "learning_rate": 4.954285714285715e-05, + "loss": 2.2897, "step": 16 }, { - "epoch": 0.06, - "learning_rate": 4.891857506361323e-05, - "loss": 2.41, + "epoch": 0.1, + "learning_rate": 4.951428571428572e-05, + "loss": 2.3772, "step": 17 }, { - "epoch": 0.07, - "learning_rate": 4.885496183206107e-05, - "loss": 2.4081, + "epoch": 0.1, + "learning_rate": 4.9485714285714294e-05, + "loss": 2.4377, "step": 18 }, { - "epoch": 0.07, - "learning_rate": 4.879134860050891e-05, - "loss": 2.371, + "epoch": 0.11, + "learning_rate": 4.9457142857142854e-05, + "loss": 2.2162, "step": 19 }, { - "epoch": 0.08, - "learning_rate": 4.8727735368956744e-05, - "loss": 2.3742, + "epoch": 0.11, + "learning_rate": 4.942857142857143e-05, + "loss": 2.1841, "step": 20 }, { - "epoch": 0.08, - "learning_rate": 4.866412213740458e-05, - "loss": 2.2301, + "epoch": 0.12, + "learning_rate": 4.94e-05, + "loss": 2.2659, "step": 21 }, { - "epoch": 0.08, - "learning_rate": 4.860050890585242e-05, - "loss": 2.3423, + "epoch": 0.13, + "learning_rate": 4.9371428571428575e-05, + "loss": 2.2422, "step": 22 }, { - "epoch": 0.09, - "learning_rate": 4.853689567430026e-05, - "loss": 2.1729, + "epoch": 0.13, + "learning_rate": 4.934285714285715e-05, + "loss": 2.2387, "step": 23 }, { - "epoch": 0.09, - "learning_rate": 4.847328244274809e-05, - "loss": 2.2131, + "epoch": 0.14, + "learning_rate": 4.9314285714285716e-05, + "loss": 2.2341, "step": 24 }, { - "epoch": 0.1, - "learning_rate": 4.840966921119593e-05, - "loss": 2.2021, + "epoch": 0.14, + "learning_rate": 4.928571428571429e-05, + "loss": 2.1916, "step": 25 }, { - "epoch": 0.1, - "learning_rate": 4.8346055979643766e-05, - "loss": 2.3912, + "epoch": 0.15, + "learning_rate": 4.9257142857142856e-05, + "loss": 2.1618, "step": 26 }, { - "epoch": 0.1, - "learning_rate": 4.82824427480916e-05, - "loss": 2.319, + "epoch": 0.15, + "learning_rate": 4.922857142857143e-05, + "loss": 2.2922, "step": 27 }, { - "epoch": 0.11, - "learning_rate": 4.821882951653944e-05, - "loss": 2.201, + "epoch": 0.16, + "learning_rate": 4.92e-05, + "loss": 2.2532, "step": 28 }, { - "epoch": 0.11, - "learning_rate": 4.8155216284987284e-05, - "loss": 2.1006, + "epoch": 0.17, + "learning_rate": 4.917142857142858e-05, + "loss": 2.1959, "step": 29 }, { - "epoch": 0.11, - "learning_rate": 4.809160305343512e-05, - "loss": 2.1128, + "epoch": 0.17, + "learning_rate": 4.9142857142857144e-05, + "loss": 2.2305, "step": 30 }, { - "epoch": 0.12, - "learning_rate": 4.802798982188295e-05, - "loss": 2.1793, + "epoch": 0.18, + "learning_rate": 4.911428571428572e-05, + "loss": 2.2708, "step": 31 }, { - "epoch": 0.12, - "learning_rate": 4.796437659033079e-05, - "loss": 2.1679, + "epoch": 0.18, + "learning_rate": 4.908571428571429e-05, + "loss": 2.1294, "step": 32 }, { - "epoch": 0.13, - "learning_rate": 4.7900763358778626e-05, - "loss": 2.1725, + "epoch": 0.19, + "learning_rate": 4.905714285714286e-05, + "loss": 2.135, "step": 33 }, { - "epoch": 0.13, - "learning_rate": 4.783715012722646e-05, - "loss": 2.171, + "epoch": 0.19, + "learning_rate": 4.902857142857143e-05, + "loss": 2.1147, "step": 34 }, { - "epoch": 0.13, - "learning_rate": 4.7773536895674307e-05, - "loss": 2.1676, + "epoch": 0.2, + "learning_rate": 4.9e-05, + "loss": 2.164, "step": 35 }, { - "epoch": 0.14, - "learning_rate": 4.7709923664122144e-05, - "loss": 2.187, + "epoch": 0.21, + "learning_rate": 4.897142857142857e-05, + "loss": 2.1336, "step": 36 }, { - "epoch": 0.14, - "learning_rate": 4.764631043256998e-05, - "loss": 2.1405, + "epoch": 0.21, + "learning_rate": 4.8942857142857146e-05, + "loss": 2.1323, "step": 37 }, { - "epoch": 0.15, - "learning_rate": 4.758269720101781e-05, - "loss": 2.057, + "epoch": 0.22, + "learning_rate": 4.891428571428572e-05, + "loss": 1.9827, "step": 38 }, { - "epoch": 0.15, - "learning_rate": 4.751908396946565e-05, - "loss": 2.0775, + "epoch": 0.22, + "learning_rate": 4.888571428571429e-05, + "loss": 2.1043, "step": 39 }, { - "epoch": 0.15, - "learning_rate": 4.7455470737913485e-05, - "loss": 2.2024, + "epoch": 0.23, + "learning_rate": 4.885714285714286e-05, + "loss": 2.1156, "step": 40 }, { - "epoch": 0.16, - "learning_rate": 4.739185750636133e-05, - "loss": 2.2435, + "epoch": 0.23, + "learning_rate": 4.882857142857143e-05, + "loss": 2.1938, "step": 41 }, { - "epoch": 0.16, - "learning_rate": 4.7328244274809166e-05, - "loss": 2.1347, + "epoch": 0.24, + "learning_rate": 4.88e-05, + "loss": 2.0351, "step": 42 }, { - "epoch": 0.16, - "learning_rate": 4.7264631043257e-05, - "loss": 2.1104, + "epoch": 0.25, + "learning_rate": 4.8771428571428574e-05, + "loss": 1.9892, "step": 43 }, { - "epoch": 0.17, - "learning_rate": 4.720101781170484e-05, - "loss": 2.0484, + "epoch": 0.25, + "learning_rate": 4.874285714285715e-05, + "loss": 1.9887, "step": 44 }, { - "epoch": 0.17, - "learning_rate": 4.713740458015267e-05, - "loss": 2.1963, + "epoch": 0.26, + "learning_rate": 4.8714285714285714e-05, + "loss": 2.0619, "step": 45 }, { - "epoch": 0.18, - "learning_rate": 4.707379134860051e-05, - "loss": 2.1868, + "epoch": 0.26, + "learning_rate": 4.868571428571429e-05, + "loss": 1.9874, "step": 46 }, { - "epoch": 0.18, - "learning_rate": 4.7010178117048345e-05, - "loss": 2.0827, + "epoch": 0.27, + "learning_rate": 4.865714285714286e-05, + "loss": 2.0538, "step": 47 }, { - "epoch": 0.18, - "learning_rate": 4.694656488549619e-05, - "loss": 2.0513, + "epoch": 0.27, + "learning_rate": 4.862857142857143e-05, + "loss": 1.9582, "step": 48 }, { - "epoch": 0.19, - "learning_rate": 4.6882951653944026e-05, - "loss": 2.031, + "epoch": 0.28, + "learning_rate": 4.86e-05, + "loss": 1.9682, "step": 49 }, { - "epoch": 0.19, - "learning_rate": 4.681933842239186e-05, - "loss": 2.1287, + "epoch": 0.29, + "learning_rate": 4.8571428571428576e-05, + "loss": 2.0755, "step": 50 }, { - "epoch": 0.19, - "learning_rate": 4.675572519083969e-05, - "loss": 2.0014, + "epoch": 0.29, + "learning_rate": 4.854285714285714e-05, + "loss": 1.9472, "step": 51 }, { - "epoch": 0.2, - "learning_rate": 4.669211195928753e-05, - "loss": 2.1081, + "epoch": 0.3, + "learning_rate": 4.8514285714285716e-05, + "loss": 2.0844, "step": 52 }, { - "epoch": 0.2, - "learning_rate": 4.662849872773537e-05, - "loss": 2.1631, + "epoch": 0.3, + "learning_rate": 4.848571428571429e-05, + "loss": 2.0636, "step": 53 }, { - "epoch": 0.21, - "learning_rate": 4.656488549618321e-05, - "loss": 1.9476, + "epoch": 0.31, + "learning_rate": 4.8457142857142864e-05, + "loss": 2.0661, "step": 54 }, { - "epoch": 0.21, - "learning_rate": 4.650127226463105e-05, - "loss": 2.1122, + "epoch": 0.31, + "learning_rate": 4.842857142857143e-05, + "loss": 2.1533, "step": 55 }, { - "epoch": 0.21, - "learning_rate": 4.6437659033078885e-05, - "loss": 1.9889, + "epoch": 0.32, + "learning_rate": 4.8400000000000004e-05, + "loss": 1.9542, "step": 56 }, { - "epoch": 0.22, - "learning_rate": 4.637404580152672e-05, - "loss": 1.9505, + "epoch": 0.33, + "learning_rate": 4.837142857142857e-05, + "loss": 1.9903, "step": 57 }, { - "epoch": 0.22, - "learning_rate": 4.631043256997455e-05, - "loss": 1.9858, + "epoch": 0.33, + "learning_rate": 4.8342857142857145e-05, + "loss": 2.1032, "step": 58 }, { - "epoch": 0.23, - "learning_rate": 4.624681933842239e-05, - "loss": 2.0182, + "epoch": 0.34, + "learning_rate": 4.831428571428572e-05, + "loss": 1.9825, "step": 59 }, { - "epoch": 0.23, - "learning_rate": 4.618320610687023e-05, - "loss": 2.0939, + "epoch": 0.34, + "learning_rate": 4.828571428571429e-05, + "loss": 2.0152, "step": 60 }, { - "epoch": 0.23, - "learning_rate": 4.611959287531807e-05, - "loss": 2.1672, + "epoch": 0.35, + "learning_rate": 4.825714285714286e-05, + "loss": 1.9837, "step": 61 }, { - "epoch": 0.24, - "learning_rate": 4.605597964376591e-05, - "loss": 2.0219, + "epoch": 0.35, + "learning_rate": 4.8228571428571426e-05, + "loss": 1.9918, "step": 62 }, { - "epoch": 0.24, - "learning_rate": 4.5992366412213745e-05, - "loss": 2.0243, + "epoch": 0.36, + "learning_rate": 4.82e-05, + "loss": 1.9813, "step": 63 }, { - "epoch": 0.24, - "learning_rate": 4.592875318066158e-05, - "loss": 2.0021, + "epoch": 0.37, + "learning_rate": 4.817142857142857e-05, + "loss": 1.8869, "step": 64 }, { - "epoch": 0.25, - "learning_rate": 4.586513994910941e-05, - "loss": 1.8785, + "epoch": 0.37, + "learning_rate": 4.8142857142857147e-05, + "loss": 1.9366, "step": 65 }, { - "epoch": 0.25, - "learning_rate": 4.5801526717557256e-05, - "loss": 1.9561, + "epoch": 0.38, + "learning_rate": 4.811428571428572e-05, + "loss": 1.9793, "step": 66 }, { - "epoch": 0.26, - "learning_rate": 4.573791348600509e-05, - "loss": 2.0123, + "epoch": 0.38, + "learning_rate": 4.808571428571429e-05, + "loss": 1.9499, "step": 67 }, { - "epoch": 0.26, - "learning_rate": 4.567430025445293e-05, - "loss": 1.9843, + "epoch": 0.39, + "learning_rate": 4.805714285714286e-05, + "loss": 1.9999, "step": 68 }, { - "epoch": 0.26, - "learning_rate": 4.561068702290077e-05, - "loss": 1.9598, + "epoch": 0.39, + "learning_rate": 4.802857142857143e-05, + "loss": 1.9717, "step": 69 }, { - "epoch": 0.27, - "learning_rate": 4.5547073791348604e-05, - "loss": 2.021, + "epoch": 0.4, + "learning_rate": 4.8e-05, + "loss": 2.0035, "step": 70 }, { - "epoch": 0.27, - "learning_rate": 4.548346055979644e-05, - "loss": 2.0623, + "epoch": 0.41, + "learning_rate": 4.7971428571428575e-05, + "loss": 1.9975, "step": 71 }, { - "epoch": 0.27, - "learning_rate": 4.541984732824428e-05, - "loss": 1.8997, + "epoch": 0.41, + "learning_rate": 4.794285714285714e-05, + "loss": 1.9979, "step": 72 }, { - "epoch": 0.28, - "learning_rate": 4.5356234096692115e-05, - "loss": 1.9407, + "epoch": 0.42, + "learning_rate": 4.7914285714285715e-05, + "loss": 2.0238, "step": 73 }, { - "epoch": 0.28, - "learning_rate": 4.529262086513995e-05, - "loss": 1.8768, + "epoch": 0.42, + "learning_rate": 4.788571428571429e-05, + "loss": 2.0658, "step": 74 }, { - "epoch": 0.29, - "learning_rate": 4.522900763358779e-05, - "loss": 2.123, + "epoch": 0.43, + "learning_rate": 4.785714285714286e-05, + "loss": 1.9228, "step": 75 }, { - "epoch": 0.29, - "learning_rate": 4.5165394402035626e-05, - "loss": 1.9216, + "epoch": 0.43, + "learning_rate": 4.782857142857143e-05, + "loss": 2.0645, "step": 76 }, { - "epoch": 0.29, - "learning_rate": 4.5101781170483464e-05, - "loss": 1.9373, + "epoch": 0.44, + "learning_rate": 4.78e-05, + "loss": 1.9114, "step": 77 }, { - "epoch": 0.3, - "learning_rate": 4.5038167938931294e-05, - "loss": 2.0328, + "epoch": 0.45, + "learning_rate": 4.777142857142857e-05, + "loss": 2.0026, "step": 78 }, { - "epoch": 0.3, - "learning_rate": 4.497455470737914e-05, - "loss": 1.9575, + "epoch": 0.45, + "learning_rate": 4.7742857142857144e-05, + "loss": 1.8819, "step": 79 }, { - "epoch": 0.31, - "learning_rate": 4.4910941475826975e-05, - "loss": 2.0379, + "epoch": 0.46, + "learning_rate": 4.771428571428572e-05, + "loss": 1.8794, "step": 80 }, { - "epoch": 0.31, - "learning_rate": 4.484732824427481e-05, - "loss": 2.0398, + "epoch": 0.46, + "learning_rate": 4.768571428571429e-05, + "loss": 1.8938, "step": 81 }, { - "epoch": 0.31, - "learning_rate": 4.478371501272265e-05, - "loss": 2.1423, + "epoch": 0.47, + "learning_rate": 4.7657142857142865e-05, + "loss": 2.0258, "step": 82 }, { - "epoch": 0.32, - "learning_rate": 4.4720101781170486e-05, - "loss": 2.0026, + "epoch": 0.47, + "learning_rate": 4.762857142857143e-05, + "loss": 1.8302, "step": 83 }, { - "epoch": 0.32, - "learning_rate": 4.465648854961832e-05, - "loss": 1.8533, + "epoch": 0.48, + "learning_rate": 4.76e-05, + "loss": 1.9461, "step": 84 }, { - "epoch": 0.32, - "learning_rate": 4.459287531806616e-05, - "loss": 1.8805, + "epoch": 0.49, + "learning_rate": 4.757142857142857e-05, + "loss": 1.9959, "step": 85 }, { - "epoch": 0.33, - "learning_rate": 4.4529262086514e-05, - "loss": 2.1085, + "epoch": 0.49, + "learning_rate": 4.7542857142857146e-05, + "loss": 1.932, "step": 86 }, { - "epoch": 0.33, - "learning_rate": 4.4465648854961834e-05, - "loss": 2.0008, + "epoch": 0.5, + "learning_rate": 4.751428571428572e-05, + "loss": 1.8174, "step": 87 }, { - "epoch": 0.34, - "learning_rate": 4.440203562340967e-05, - "loss": 1.8727, + "epoch": 0.5, + "learning_rate": 4.7485714285714286e-05, + "loss": 1.97, "step": 88 }, { - "epoch": 0.34, - "learning_rate": 4.433842239185751e-05, - "loss": 1.9518, + "epoch": 0.51, + "learning_rate": 4.745714285714286e-05, + "loss": 1.9027, "step": 89 }, { - "epoch": 0.34, - "learning_rate": 4.4274809160305345e-05, - "loss": 2.0203, + "epoch": 0.51, + "learning_rate": 4.742857142857143e-05, + "loss": 1.9142, "step": 90 }, { - "epoch": 0.35, - "learning_rate": 4.421119592875318e-05, - "loss": 1.9664, + "epoch": 0.52, + "learning_rate": 4.74e-05, + "loss": 1.995, "step": 91 }, { - "epoch": 0.35, - "learning_rate": 4.414758269720102e-05, - "loss": 1.9706, + "epoch": 0.53, + "learning_rate": 4.7371428571428574e-05, + "loss": 1.9443, "step": 92 }, { - "epoch": 0.35, - "learning_rate": 4.408396946564886e-05, - "loss": 1.8899, + "epoch": 0.53, + "learning_rate": 4.734285714285715e-05, + "loss": 1.8668, "step": 93 }, { - "epoch": 0.36, - "learning_rate": 4.4020356234096694e-05, - "loss": 1.9836, + "epoch": 0.54, + "learning_rate": 4.7314285714285714e-05, + "loss": 1.8348, "step": 94 }, { - "epoch": 0.36, - "learning_rate": 4.395674300254453e-05, - "loss": 1.9058, + "epoch": 0.54, + "learning_rate": 4.728571428571429e-05, + "loss": 1.8173, "step": 95 }, { - "epoch": 0.37, - "learning_rate": 4.389312977099237e-05, - "loss": 1.743, + "epoch": 0.55, + "learning_rate": 4.725714285714286e-05, + "loss": 1.8596, "step": 96 }, { - "epoch": 0.37, - "learning_rate": 4.3829516539440205e-05, - "loss": 1.8766, + "epoch": 0.55, + "learning_rate": 4.7228571428571435e-05, + "loss": 1.7175, "step": 97 }, { - "epoch": 0.37, - "learning_rate": 4.376590330788805e-05, - "loss": 1.9296, + "epoch": 0.56, + "learning_rate": 4.72e-05, + "loss": 1.8843, "step": 98 }, { - "epoch": 0.38, - "learning_rate": 4.370229007633588e-05, - "loss": 1.9061, + "epoch": 0.57, + "learning_rate": 4.717142857142857e-05, + "loss": 1.8863, "step": 99 }, { - "epoch": 0.38, - "learning_rate": 4.3638676844783716e-05, - "loss": 1.8887, + "epoch": 0.57, + "learning_rate": 4.714285714285714e-05, + "loss": 1.7893, "step": 100 }, { - "epoch": 0.39, - "learning_rate": 4.357506361323155e-05, - "loss": 2.0148, + "epoch": 0.58, + "learning_rate": 4.7114285714285716e-05, + "loss": 1.852, "step": 101 }, { - "epoch": 0.39, - "learning_rate": 4.351145038167939e-05, - "loss": 1.8899, + "epoch": 0.58, + "learning_rate": 4.708571428571429e-05, + "loss": 1.8383, "step": 102 }, { - "epoch": 0.39, - "learning_rate": 4.344783715012723e-05, - "loss": 1.9485, + "epoch": 0.59, + "learning_rate": 4.7057142857142864e-05, + "loss": 1.9066, "step": 103 }, { - "epoch": 0.4, - "learning_rate": 4.3384223918575064e-05, - "loss": 1.9325, + "epoch": 0.59, + "learning_rate": 4.702857142857143e-05, + "loss": 1.9866, "step": 104 }, { - "epoch": 0.4, - "learning_rate": 4.332061068702291e-05, - "loss": 1.9845, + "epoch": 0.6, + "learning_rate": 4.7e-05, + "loss": 1.8563, "step": 105 }, { - "epoch": 0.4, - "learning_rate": 4.325699745547074e-05, - "loss": 2.0388, + "epoch": 0.61, + "learning_rate": 4.697142857142857e-05, + "loss": 1.7936, "step": 106 }, { - "epoch": 0.41, - "learning_rate": 4.3193384223918576e-05, - "loss": 1.8658, + "epoch": 0.61, + "learning_rate": 4.6942857142857145e-05, + "loss": 1.8427, "step": 107 }, { - "epoch": 0.41, - "learning_rate": 4.312977099236641e-05, - "loss": 1.9221, + "epoch": 0.62, + "learning_rate": 4.691428571428572e-05, + "loss": 1.8277, "step": 108 }, { - "epoch": 0.42, - "learning_rate": 4.306615776081425e-05, - "loss": 1.9323, + "epoch": 0.62, + "learning_rate": 4.6885714285714285e-05, + "loss": 1.8244, "step": 109 }, { - "epoch": 0.42, - "learning_rate": 4.300254452926209e-05, - "loss": 2.0252, + "epoch": 0.63, + "learning_rate": 4.685714285714286e-05, + "loss": 1.9399, "step": 110 }, { - "epoch": 0.42, - "learning_rate": 4.293893129770993e-05, - "loss": 2.0264, + "epoch": 0.63, + "learning_rate": 4.682857142857143e-05, + "loss": 1.9104, "step": 111 }, { - "epoch": 0.43, - "learning_rate": 4.287531806615776e-05, - "loss": 1.8449, + "epoch": 0.64, + "learning_rate": 4.6800000000000006e-05, + "loss": 1.7908, "step": 112 }, { - "epoch": 0.43, - "learning_rate": 4.28117048346056e-05, - "loss": 2.0663, + "epoch": 0.65, + "learning_rate": 4.677142857142857e-05, + "loss": 1.8952, "step": 113 }, { - "epoch": 0.44, - "learning_rate": 4.2748091603053435e-05, - "loss": 1.9847, + "epoch": 0.65, + "learning_rate": 4.6742857142857146e-05, + "loss": 1.8587, "step": 114 }, { - "epoch": 0.44, - "learning_rate": 4.268447837150127e-05, - "loss": 1.8867, + "epoch": 0.66, + "learning_rate": 4.671428571428571e-05, + "loss": 1.8822, "step": 115 }, { - "epoch": 0.44, - "learning_rate": 4.262086513994911e-05, - "loss": 1.8354, + "epoch": 0.66, + "learning_rate": 4.668571428571429e-05, + "loss": 1.9028, "step": 116 }, { - "epoch": 0.45, - "learning_rate": 4.255725190839695e-05, - "loss": 1.9582, + "epoch": 0.67, + "learning_rate": 4.665714285714286e-05, + "loss": 1.872, "step": 117 }, { - "epoch": 0.45, - "learning_rate": 4.249363867684479e-05, - "loss": 1.8275, + "epoch": 0.67, + "learning_rate": 4.6628571428571434e-05, + "loss": 1.8945, "step": 118 }, { - "epoch": 0.45, - "learning_rate": 4.243002544529262e-05, - "loss": 1.8622, + "epoch": 0.68, + "learning_rate": 4.660000000000001e-05, + "loss": 1.8815, "step": 119 }, { - "epoch": 0.46, - "learning_rate": 4.236641221374046e-05, - "loss": 1.8426, + "epoch": 0.69, + "learning_rate": 4.6571428571428575e-05, + "loss": 1.7866, "step": 120 }, { - "epoch": 0.46, - "learning_rate": 4.2302798982188295e-05, - "loss": 1.8311, + "epoch": 0.69, + "learning_rate": 4.654285714285714e-05, + "loss": 1.8106, "step": 121 }, { - "epoch": 0.47, - "learning_rate": 4.223918575063613e-05, - "loss": 1.911, + "epoch": 0.7, + "learning_rate": 4.6514285714285715e-05, + "loss": 1.9063, "step": 122 }, { - "epoch": 0.47, - "learning_rate": 4.2175572519083975e-05, - "loss": 1.9679, + "epoch": 0.7, + "learning_rate": 4.648571428571429e-05, + "loss": 1.8769, "step": 123 }, { - "epoch": 0.47, - "learning_rate": 4.211195928753181e-05, - "loss": 1.7676, + "epoch": 0.71, + "learning_rate": 4.645714285714286e-05, + "loss": 1.8105, "step": 124 }, { - "epoch": 0.48, - "learning_rate": 4.204834605597965e-05, - "loss": 1.8708, + "epoch": 0.71, + "learning_rate": 4.642857142857143e-05, + "loss": 1.7734, "step": 125 }, { - "epoch": 0.48, - "learning_rate": 4.198473282442748e-05, - "loss": 1.8456, + "epoch": 0.72, + "learning_rate": 4.64e-05, + "loss": 1.7694, "step": 126 }, { - "epoch": 0.48, - "learning_rate": 4.192111959287532e-05, - "loss": 1.9192, + "epoch": 0.73, + "learning_rate": 4.637142857142857e-05, + "loss": 1.8728, "step": 127 }, { - "epoch": 0.49, - "learning_rate": 4.1857506361323154e-05, - "loss": 1.9453, + "epoch": 0.73, + "learning_rate": 4.6342857142857143e-05, + "loss": 1.7541, "step": 128 }, { - "epoch": 0.49, - "learning_rate": 4.1793893129771e-05, - "loss": 1.8886, + "epoch": 0.74, + "learning_rate": 4.631428571428572e-05, + "loss": 1.8828, "step": 129 }, { - "epoch": 0.5, - "learning_rate": 4.1730279898218835e-05, - "loss": 1.8233, + "epoch": 0.74, + "learning_rate": 4.628571428571429e-05, + "loss": 1.8825, "step": 130 }, { - "epoch": 0.5, - "learning_rate": 4.166666666666667e-05, - "loss": 1.8474, + "epoch": 0.75, + "learning_rate": 4.625714285714286e-05, + "loss": 1.869, "step": 131 }, { - "epoch": 0.5, - "learning_rate": 4.160305343511451e-05, - "loss": 1.9059, + "epoch": 0.75, + "learning_rate": 4.622857142857143e-05, + "loss": 1.8532, "step": 132 }, { - "epoch": 0.51, - "learning_rate": 4.153944020356234e-05, - "loss": 1.7812, + "epoch": 0.76, + "learning_rate": 4.6200000000000005e-05, + "loss": 1.7864, "step": 133 }, { - "epoch": 0.51, - "learning_rate": 4.1475826972010176e-05, - "loss": 1.9022, + "epoch": 0.77, + "learning_rate": 4.617142857142857e-05, + "loss": 1.9118, "step": 134 }, { - "epoch": 0.52, - "learning_rate": 4.1412213740458014e-05, - "loss": 1.9114, + "epoch": 0.77, + "learning_rate": 4.6142857142857145e-05, + "loss": 1.8429, "step": 135 }, { - "epoch": 0.52, - "learning_rate": 4.134860050890586e-05, - "loss": 1.9314, + "epoch": 0.78, + "learning_rate": 4.611428571428571e-05, + "loss": 1.8928, "step": 136 }, { - "epoch": 0.52, - "learning_rate": 4.1284987277353694e-05, - "loss": 1.9968, + "epoch": 0.78, + "learning_rate": 4.6085714285714286e-05, + "loss": 1.756, "step": 137 }, { - "epoch": 0.53, - "learning_rate": 4.122137404580153e-05, - "loss": 1.8303, + "epoch": 0.79, + "learning_rate": 4.605714285714286e-05, + "loss": 1.7718, "step": 138 }, { - "epoch": 0.53, - "learning_rate": 4.115776081424936e-05, - "loss": 1.8287, + "epoch": 0.79, + "learning_rate": 4.602857142857143e-05, + "loss": 1.709, "step": 139 }, { - "epoch": 0.53, - "learning_rate": 4.10941475826972e-05, - "loss": 1.7839, + "epoch": 0.8, + "learning_rate": 4.600000000000001e-05, + "loss": 1.7784, "step": 140 }, { - "epoch": 0.54, - "learning_rate": 4.1030534351145036e-05, - "loss": 1.8441, + "epoch": 0.81, + "learning_rate": 4.5971428571428574e-05, + "loss": 1.7954, "step": 141 }, { - "epoch": 0.54, - "learning_rate": 4.096692111959288e-05, - "loss": 1.8067, + "epoch": 0.81, + "learning_rate": 4.594285714285714e-05, + "loss": 1.694, "step": 142 }, { - "epoch": 0.55, - "learning_rate": 4.090330788804072e-05, - "loss": 1.7333, + "epoch": 0.82, + "learning_rate": 4.5914285714285714e-05, + "loss": 1.7413, "step": 143 }, { - "epoch": 0.55, - "learning_rate": 4.0839694656488554e-05, - "loss": 1.8521, + "epoch": 0.82, + "learning_rate": 4.588571428571429e-05, + "loss": 1.7266, "step": 144 }, { - "epoch": 0.55, - "learning_rate": 4.077608142493639e-05, - "loss": 1.7242, + "epoch": 0.83, + "learning_rate": 4.585714285714286e-05, + "loss": 1.862, "step": 145 }, { - "epoch": 0.56, - "learning_rate": 4.071246819338422e-05, - "loss": 1.7233, + "epoch": 0.83, + "learning_rate": 4.5828571428571435e-05, + "loss": 1.767, "step": 146 }, { - "epoch": 0.56, - "learning_rate": 4.064885496183206e-05, - "loss": 1.844, + "epoch": 0.84, + "learning_rate": 4.58e-05, + "loss": 1.7439, "step": 147 }, { - "epoch": 0.56, - "learning_rate": 4.05852417302799e-05, - "loss": 1.8458, + "epoch": 0.85, + "learning_rate": 4.5771428571428576e-05, + "loss": 1.685, "step": 148 }, { - "epoch": 0.57, - "learning_rate": 4.052162849872774e-05, - "loss": 1.7438, + "epoch": 0.85, + "learning_rate": 4.574285714285714e-05, + "loss": 1.8017, "step": 149 }, { - "epoch": 0.57, - "learning_rate": 4.0458015267175576e-05, - "loss": 1.8411, + "epoch": 0.86, + "learning_rate": 4.5714285714285716e-05, + "loss": 1.7417, "step": 150 }, { - "epoch": 0.58, - "learning_rate": 4.0394402035623413e-05, - "loss": 1.8313, + "epoch": 0.86, + "learning_rate": 4.568571428571429e-05, + "loss": 1.7689, "step": 151 }, { - "epoch": 0.58, - "learning_rate": 4.033078880407125e-05, - "loss": 1.7893, + "epoch": 0.87, + "learning_rate": 4.5657142857142857e-05, + "loss": 1.7246, "step": 152 }, { - "epoch": 0.58, - "learning_rate": 4.026717557251908e-05, - "loss": 1.7745, + "epoch": 0.87, + "learning_rate": 4.562857142857143e-05, + "loss": 1.7656, "step": 153 }, { - "epoch": 0.59, - "learning_rate": 4.0203562340966925e-05, - "loss": 1.8041, + "epoch": 0.88, + "learning_rate": 4.5600000000000004e-05, + "loss": 1.8091, "step": 154 }, { - "epoch": 0.59, - "learning_rate": 4.013994910941476e-05, - "loss": 2.0707, + "epoch": 0.89, + "learning_rate": 4.557142857142858e-05, + "loss": 1.8071, "step": 155 }, { - "epoch": 0.6, - "learning_rate": 4.00763358778626e-05, - "loss": 1.8498, + "epoch": 0.89, + "learning_rate": 4.5542857142857144e-05, + "loss": 1.8548, "step": 156 }, { - "epoch": 0.6, - "learning_rate": 4.0012722646310436e-05, - "loss": 1.835, + "epoch": 0.9, + "learning_rate": 4.551428571428572e-05, + "loss": 1.7864, "step": 157 }, { - "epoch": 0.6, - "learning_rate": 3.994910941475827e-05, - "loss": 1.7922, + "epoch": 0.9, + "learning_rate": 4.5485714285714285e-05, + "loss": 1.7857, "step": 158 }, { - "epoch": 0.61, - "learning_rate": 3.988549618320611e-05, - "loss": 1.7463, + "epoch": 0.91, + "learning_rate": 4.545714285714286e-05, + "loss": 1.8296, "step": 159 }, { - "epoch": 0.61, - "learning_rate": 3.982188295165395e-05, - "loss": 1.7811, + "epoch": 0.91, + "learning_rate": 4.542857142857143e-05, + "loss": 1.7885, "step": 160 }, { - "epoch": 0.61, - "learning_rate": 3.9758269720101784e-05, - "loss": 1.7887, + "epoch": 0.92, + "learning_rate": 4.5400000000000006e-05, + "loss": 1.8123, "step": 161 }, { - "epoch": 0.62, - "learning_rate": 3.969465648854962e-05, - "loss": 1.8297, + "epoch": 0.93, + "learning_rate": 4.537142857142857e-05, + "loss": 1.782, "step": 162 }, { - "epoch": 0.62, - "learning_rate": 3.963104325699746e-05, - "loss": 1.7985, + "epoch": 0.93, + "learning_rate": 4.534285714285714e-05, + "loss": 1.753, "step": 163 }, { - "epoch": 0.63, - "learning_rate": 3.9567430025445295e-05, - "loss": 1.8592, + "epoch": 0.94, + "learning_rate": 4.531428571428571e-05, + "loss": 1.7099, "step": 164 }, { - "epoch": 0.63, - "learning_rate": 3.950381679389313e-05, - "loss": 1.8001, + "epoch": 0.94, + "learning_rate": 4.528571428571429e-05, + "loss": 1.7098, "step": 165 }, { - "epoch": 0.63, - "learning_rate": 3.944020356234097e-05, - "loss": 1.9156, + "epoch": 0.95, + "learning_rate": 4.525714285714286e-05, + "loss": 1.6973, "step": 166 }, { - "epoch": 0.64, - "learning_rate": 3.9376590330788807e-05, - "loss": 1.7389, + "epoch": 0.95, + "learning_rate": 4.5228571428571434e-05, + "loss": 1.7178, "step": 167 }, { - "epoch": 0.64, - "learning_rate": 3.9312977099236644e-05, - "loss": 1.8288, + "epoch": 0.96, + "learning_rate": 4.52e-05, + "loss": 1.7575, "step": 168 }, { - "epoch": 0.65, - "learning_rate": 3.924936386768448e-05, - "loss": 1.9269, + "epoch": 0.97, + "learning_rate": 4.5171428571428575e-05, + "loss": 1.7206, "step": 169 }, { - "epoch": 0.65, - "learning_rate": 3.918575063613232e-05, - "loss": 1.8447, + "epoch": 0.97, + "learning_rate": 4.514285714285714e-05, + "loss": 1.7937, "step": 170 }, { - "epoch": 0.65, - "learning_rate": 3.9122137404580155e-05, - "loss": 1.7465, + "epoch": 0.98, + "learning_rate": 4.5114285714285715e-05, + "loss": 1.7444, "step": 171 }, { - "epoch": 0.66, - "learning_rate": 3.905852417302799e-05, - "loss": 1.8731, + "epoch": 0.98, + "learning_rate": 4.508571428571429e-05, + "loss": 1.824, "step": 172 }, { - "epoch": 0.66, - "learning_rate": 3.899491094147583e-05, - "loss": 1.8109, + "epoch": 0.99, + "learning_rate": 4.5057142857142856e-05, + "loss": 1.7378, "step": 173 }, { - "epoch": 0.66, - "learning_rate": 3.8931297709923666e-05, - "loss": 1.8166, + "epoch": 0.99, + "learning_rate": 4.502857142857143e-05, + "loss": 1.7011, "step": 174 }, { - "epoch": 0.67, - "learning_rate": 3.88676844783715e-05, - "loss": 1.7502, + "epoch": 1.0, + "learning_rate": 4.5e-05, + "loss": 1.8994, "step": 175 }, { - "epoch": 0.67, - "learning_rate": 3.880407124681934e-05, - "loss": 1.9094, + "epoch": 1.01, + "learning_rate": 4.4971428571428576e-05, + "loss": 1.7652, "step": 176 }, { - "epoch": 0.68, - "learning_rate": 3.874045801526718e-05, - "loss": 1.8702, + "epoch": 1.01, + "learning_rate": 4.494285714285715e-05, + "loss": 1.7149, "step": 177 }, { - "epoch": 0.68, - "learning_rate": 3.8676844783715014e-05, - "loss": 1.8245, + "epoch": 1.02, + "learning_rate": 4.491428571428572e-05, + "loss": 1.8049, "step": 178 }, { - "epoch": 0.68, - "learning_rate": 3.861323155216285e-05, - "loss": 1.838, + "epoch": 1.02, + "learning_rate": 4.4885714285714284e-05, + "loss": 1.7698, "step": 179 }, { - "epoch": 0.69, - "learning_rate": 3.854961832061069e-05, - "loss": 1.7528, + "epoch": 1.03, + "learning_rate": 4.485714285714286e-05, + "loss": 1.6461, "step": 180 }, { - "epoch": 0.69, - "learning_rate": 3.8486005089058526e-05, - "loss": 1.7561, + "epoch": 1.03, + "learning_rate": 4.482857142857143e-05, + "loss": 1.7399, "step": 181 }, { - "epoch": 0.69, - "learning_rate": 3.842239185750636e-05, - "loss": 1.8, + "epoch": 1.04, + "learning_rate": 4.4800000000000005e-05, + "loss": 1.7278, "step": 182 }, { - "epoch": 0.7, - "learning_rate": 3.83587786259542e-05, - "loss": 1.9431, + "epoch": 1.05, + "learning_rate": 4.477142857142858e-05, + "loss": 1.7141, "step": 183 }, { - "epoch": 0.7, - "learning_rate": 3.829516539440204e-05, - "loss": 1.9274, + "epoch": 1.05, + "learning_rate": 4.4742857142857145e-05, + "loss": 1.7329, "step": 184 }, { - "epoch": 0.71, - "learning_rate": 3.8231552162849874e-05, - "loss": 1.786, + "epoch": 1.06, + "learning_rate": 4.471428571428571e-05, + "loss": 1.6483, "step": 185 }, { - "epoch": 0.71, - "learning_rate": 3.816793893129771e-05, - "loss": 1.7577, + "epoch": 1.06, + "learning_rate": 4.4685714285714286e-05, + "loss": 1.6479, "step": 186 }, { - "epoch": 0.71, - "learning_rate": 3.810432569974555e-05, - "loss": 1.744, + "epoch": 1.07, + "learning_rate": 4.465714285714286e-05, + "loss": 1.6389, "step": 187 }, { - "epoch": 0.72, - "learning_rate": 3.8040712468193385e-05, - "loss": 1.7136, + "epoch": 1.07, + "learning_rate": 4.462857142857143e-05, + "loss": 1.7568, "step": 188 }, { - "epoch": 0.72, - "learning_rate": 3.797709923664122e-05, - "loss": 1.7748, + "epoch": 1.08, + "learning_rate": 4.46e-05, + "loss": 1.7051, "step": 189 }, { - "epoch": 0.73, - "learning_rate": 3.791348600508906e-05, - "loss": 1.8062, + "epoch": 1.09, + "learning_rate": 4.4571428571428574e-05, + "loss": 1.6474, "step": 190 }, { - "epoch": 0.73, - "learning_rate": 3.7849872773536896e-05, - "loss": 1.9949, + "epoch": 1.09, + "learning_rate": 4.454285714285715e-05, + "loss": 1.6429, "step": 191 }, { - "epoch": 0.73, - "learning_rate": 3.778625954198473e-05, - "loss": 1.566, + "epoch": 1.1, + "learning_rate": 4.4514285714285714e-05, + "loss": 1.7265, "step": 192 }, { - "epoch": 0.74, - "learning_rate": 3.772264631043258e-05, - "loss": 1.9316, + "epoch": 1.1, + "learning_rate": 4.448571428571429e-05, + "loss": 1.75, "step": 193 }, { - "epoch": 0.74, - "learning_rate": 3.765903307888041e-05, - "loss": 1.7205, + "epoch": 1.11, + "learning_rate": 4.445714285714286e-05, + "loss": 1.6394, "step": 194 }, { - "epoch": 0.74, - "learning_rate": 3.7595419847328244e-05, - "loss": 1.9365, + "epoch": 1.11, + "learning_rate": 4.442857142857143e-05, + "loss": 1.7156, "step": 195 }, { - "epoch": 0.75, - "learning_rate": 3.753180661577608e-05, - "loss": 1.8079, + "epoch": 1.12, + "learning_rate": 4.44e-05, + "loss": 1.7182, "step": 196 }, { - "epoch": 0.75, - "learning_rate": 3.746819338422392e-05, - "loss": 1.9424, + "epoch": 1.13, + "learning_rate": 4.4371428571428575e-05, + "loss": 1.6993, "step": 197 }, { - "epoch": 0.76, - "learning_rate": 3.7404580152671756e-05, - "loss": 1.725, + "epoch": 1.13, + "learning_rate": 4.434285714285715e-05, + "loss": 1.6851, "step": 198 }, { - "epoch": 0.76, - "learning_rate": 3.73409669211196e-05, - "loss": 1.6289, + "epoch": 1.14, + "learning_rate": 4.4314285714285716e-05, + "loss": 1.6644, "step": 199 }, { - "epoch": 0.76, - "learning_rate": 3.727735368956743e-05, - "loss": 1.894, + "epoch": 1.14, + "learning_rate": 4.428571428571428e-05, + "loss": 1.6259, "step": 200 }, { - "epoch": 0.77, - "learning_rate": 3.721374045801527e-05, - "loss": 1.8859, + "epoch": 1.15, + "learning_rate": 4.4257142857142856e-05, + "loss": 1.7085, "step": 201 }, { - "epoch": 0.77, - "learning_rate": 3.7150127226463104e-05, - "loss": 1.7943, + "epoch": 1.15, + "learning_rate": 4.422857142857143e-05, + "loss": 1.634, "step": 202 }, { - "epoch": 0.77, - "learning_rate": 3.708651399491094e-05, - "loss": 1.7328, + "epoch": 1.16, + "learning_rate": 4.4200000000000004e-05, + "loss": 1.6908, "step": 203 }, { - "epoch": 0.78, - "learning_rate": 3.702290076335878e-05, - "loss": 1.9442, + "epoch": 1.17, + "learning_rate": 4.417142857142858e-05, + "loss": 1.7158, "step": 204 }, { - "epoch": 0.78, - "learning_rate": 3.695928753180662e-05, - "loss": 1.722, + "epoch": 1.17, + "learning_rate": 4.4142857142857144e-05, + "loss": 1.7171, "step": 205 }, { - "epoch": 0.79, - "learning_rate": 3.689567430025446e-05, - "loss": 1.7147, + "epoch": 1.18, + "learning_rate": 4.411428571428572e-05, + "loss": 1.6457, "step": 206 }, { - "epoch": 0.79, - "learning_rate": 3.683206106870229e-05, - "loss": 1.7762, + "epoch": 1.18, + "learning_rate": 4.4085714285714285e-05, + "loss": 1.6225, "step": 207 }, { - "epoch": 0.79, - "learning_rate": 3.6768447837150126e-05, - "loss": 1.712, + "epoch": 1.19, + "learning_rate": 4.405714285714286e-05, + "loss": 1.7145, "step": 208 }, { - "epoch": 0.8, - "learning_rate": 3.6704834605597963e-05, - "loss": 1.5977, + "epoch": 1.19, + "learning_rate": 4.402857142857143e-05, + "loss": 1.6146, "step": 209 }, { - "epoch": 0.8, - "learning_rate": 3.66412213740458e-05, - "loss": 1.8007, + "epoch": 1.2, + "learning_rate": 4.4000000000000006e-05, + "loss": 1.6774, "step": 210 }, { - "epoch": 0.81, - "learning_rate": 3.6577608142493644e-05, - "loss": 1.8174, + "epoch": 1.21, + "learning_rate": 4.397142857142857e-05, + "loss": 1.6598, "step": 211 }, { - "epoch": 0.81, - "learning_rate": 3.651399491094148e-05, - "loss": 1.6079, + "epoch": 1.21, + "learning_rate": 4.3942857142857146e-05, + "loss": 1.6714, "step": 212 }, { - "epoch": 0.81, - "learning_rate": 3.645038167938932e-05, - "loss": 1.7263, + "epoch": 1.22, + "learning_rate": 4.391428571428572e-05, + "loss": 1.7704, "step": 213 }, { - "epoch": 0.82, - "learning_rate": 3.638676844783715e-05, - "loss": 1.6333, + "epoch": 1.22, + "learning_rate": 4.388571428571429e-05, + "loss": 1.6439, "step": 214 }, { - "epoch": 0.82, - "learning_rate": 3.6323155216284986e-05, - "loss": 1.7735, + "epoch": 1.23, + "learning_rate": 4.385714285714286e-05, + "loss": 1.6367, "step": 215 }, { - "epoch": 0.82, - "learning_rate": 3.625954198473282e-05, - "loss": 1.6724, + "epoch": 1.23, + "learning_rate": 4.382857142857143e-05, + "loss": 1.6505, "step": 216 }, { - "epoch": 0.83, - "learning_rate": 3.619592875318067e-05, - "loss": 1.8875, + "epoch": 1.24, + "learning_rate": 4.38e-05, + "loss": 1.6664, "step": 217 }, { - "epoch": 0.83, - "learning_rate": 3.6132315521628504e-05, - "loss": 1.7282, + "epoch": 1.25, + "learning_rate": 4.3771428571428574e-05, + "loss": 1.5998, "step": 218 }, { - "epoch": 0.84, - "learning_rate": 3.606870229007634e-05, - "loss": 1.713, + "epoch": 1.25, + "learning_rate": 4.374285714285715e-05, + "loss": 1.698, "step": 219 }, { - "epoch": 0.84, - "learning_rate": 3.600508905852418e-05, - "loss": 1.7317, + "epoch": 1.26, + "learning_rate": 4.371428571428572e-05, + "loss": 1.664, "step": 220 }, { - "epoch": 0.84, - "learning_rate": 3.594147582697201e-05, - "loss": 1.7028, + "epoch": 1.26, + "learning_rate": 4.368571428571429e-05, + "loss": 1.6258, "step": 221 }, { - "epoch": 0.85, - "learning_rate": 3.5877862595419845e-05, - "loss": 1.637, + "epoch": 1.27, + "learning_rate": 4.3657142857142855e-05, + "loss": 1.6731, "step": 222 }, { - "epoch": 0.85, - "learning_rate": 3.581424936386768e-05, - "loss": 1.7293, + "epoch": 1.27, + "learning_rate": 4.362857142857143e-05, + "loss": 1.7655, "step": 223 }, { - "epoch": 0.85, - "learning_rate": 3.5750636132315526e-05, - "loss": 1.7743, + "epoch": 1.28, + "learning_rate": 4.36e-05, + "loss": 1.579, "step": 224 }, { - "epoch": 0.86, - "learning_rate": 3.568702290076336e-05, - "loss": 1.6725, + "epoch": 1.29, + "learning_rate": 4.3571428571428576e-05, + "loss": 1.6825, "step": 225 }, { - "epoch": 0.86, - "learning_rate": 3.56234096692112e-05, - "loss": 1.7859, + "epoch": 1.29, + "learning_rate": 4.354285714285714e-05, + "loss": 1.6713, "step": 226 }, { - "epoch": 0.87, - "learning_rate": 3.555979643765903e-05, - "loss": 1.662, + "epoch": 1.3, + "learning_rate": 4.351428571428572e-05, + "loss": 1.6229, "step": 227 }, { - "epoch": 0.87, - "learning_rate": 3.549618320610687e-05, - "loss": 1.7467, + "epoch": 1.3, + "learning_rate": 4.3485714285714284e-05, + "loss": 1.6599, "step": 228 }, { - "epoch": 0.87, - "learning_rate": 3.5432569974554705e-05, - "loss": 1.6985, + "epoch": 1.31, + "learning_rate": 4.345714285714286e-05, + "loss": 1.6295, "step": 229 }, { - "epoch": 0.88, - "learning_rate": 3.536895674300255e-05, - "loss": 1.7305, + "epoch": 1.31, + "learning_rate": 4.342857142857143e-05, + "loss": 1.6991, "step": 230 }, { - "epoch": 0.88, - "learning_rate": 3.5305343511450386e-05, - "loss": 1.8776, + "epoch": 1.32, + "learning_rate": 4.3400000000000005e-05, + "loss": 1.7486, "step": 231 }, { - "epoch": 0.89, - "learning_rate": 3.524173027989822e-05, - "loss": 1.7783, + "epoch": 1.33, + "learning_rate": 4.337142857142857e-05, + "loss": 1.6171, "step": 232 }, { - "epoch": 0.89, - "learning_rate": 3.517811704834606e-05, - "loss": 1.7381, + "epoch": 1.33, + "learning_rate": 4.3342857142857145e-05, + "loss": 1.6926, "step": 233 }, { - "epoch": 0.89, - "learning_rate": 3.511450381679389e-05, - "loss": 1.8489, + "epoch": 1.34, + "learning_rate": 4.331428571428572e-05, + "loss": 1.7428, "step": 234 }, { - "epoch": 0.9, - "learning_rate": 3.505089058524173e-05, - "loss": 1.7912, + "epoch": 1.34, + "learning_rate": 4.328571428571429e-05, + "loss": 1.6501, "step": 235 }, { - "epoch": 0.9, - "learning_rate": 3.498727735368957e-05, - "loss": 1.7976, + "epoch": 1.35, + "learning_rate": 4.325714285714286e-05, + "loss": 1.6126, "step": 236 }, { - "epoch": 0.9, - "learning_rate": 3.492366412213741e-05, - "loss": 1.7394, + "epoch": 1.35, + "learning_rate": 4.3228571428571426e-05, + "loss": 1.5454, "step": 237 }, { - "epoch": 0.91, - "learning_rate": 3.4860050890585245e-05, - "loss": 1.6584, + "epoch": 1.36, + "learning_rate": 4.32e-05, + "loss": 1.6699, "step": 238 }, { - "epoch": 0.91, - "learning_rate": 3.479643765903308e-05, - "loss": 1.8044, + "epoch": 1.37, + "learning_rate": 4.317142857142857e-05, + "loss": 1.6015, "step": 239 }, { - "epoch": 0.92, - "learning_rate": 3.473282442748092e-05, - "loss": 1.7773, + "epoch": 1.37, + "learning_rate": 4.314285714285715e-05, + "loss": 1.6008, "step": 240 }, { - "epoch": 0.92, - "learning_rate": 3.466921119592875e-05, - "loss": 1.76, + "epoch": 1.38, + "learning_rate": 4.311428571428572e-05, + "loss": 1.7071, "step": 241 }, { - "epoch": 0.92, - "learning_rate": 3.4605597964376594e-05, - "loss": 1.7517, + "epoch": 1.38, + "learning_rate": 4.308571428571429e-05, + "loss": 1.6925, "step": 242 }, { - "epoch": 0.93, - "learning_rate": 3.454198473282443e-05, - "loss": 1.7994, + "epoch": 1.39, + "learning_rate": 4.3057142857142854e-05, + "loss": 1.6395, "step": 243 }, { - "epoch": 0.93, - "learning_rate": 3.447837150127227e-05, - "loss": 1.7396, + "epoch": 1.39, + "learning_rate": 4.302857142857143e-05, + "loss": 1.7645, "step": 244 }, { - "epoch": 0.94, - "learning_rate": 3.4414758269720105e-05, - "loss": 1.7299, + "epoch": 1.4, + "learning_rate": 4.3e-05, + "loss": 1.6794, "step": 245 }, { - "epoch": 0.94, - "learning_rate": 3.435114503816794e-05, - "loss": 1.6403, + "epoch": 1.41, + "learning_rate": 4.2971428571428575e-05, + "loss": 1.6595, "step": 246 }, { - "epoch": 0.94, - "learning_rate": 3.428753180661578e-05, - "loss": 1.6074, + "epoch": 1.41, + "learning_rate": 4.294285714285715e-05, + "loss": 1.5587, "step": 247 }, { - "epoch": 0.95, - "learning_rate": 3.4223918575063616e-05, - "loss": 1.7362, + "epoch": 1.42, + "learning_rate": 4.2914285714285716e-05, + "loss": 1.6271, "step": 248 }, { - "epoch": 0.95, - "learning_rate": 3.416030534351145e-05, - "loss": 1.6809, + "epoch": 1.42, + "learning_rate": 4.288571428571429e-05, + "loss": 1.5774, "step": 249 }, { - "epoch": 0.95, - "learning_rate": 3.409669211195929e-05, - "loss": 1.7009, + "epoch": 1.43, + "learning_rate": 4.2857142857142856e-05, + "loss": 1.7164, "step": 250 }, { - "epoch": 0.96, - "learning_rate": 3.403307888040713e-05, - "loss": 1.7218, + "epoch": 1.43, + "learning_rate": 4.282857142857143e-05, + "loss": 1.6044, "step": 251 }, { - "epoch": 0.96, - "learning_rate": 3.3969465648854964e-05, - "loss": 1.7234, + "epoch": 1.44, + "learning_rate": 4.2800000000000004e-05, + "loss": 1.607, "step": 252 }, { - "epoch": 0.97, - "learning_rate": 3.39058524173028e-05, - "loss": 1.6854, + "epoch": 1.45, + "learning_rate": 4.277142857142857e-05, + "loss": 1.4833, "step": 253 }, { - "epoch": 0.97, - "learning_rate": 3.384223918575064e-05, - "loss": 1.7727, + "epoch": 1.45, + "learning_rate": 4.2742857142857144e-05, + "loss": 1.6581, "step": 254 }, { - "epoch": 0.97, - "learning_rate": 3.3778625954198475e-05, - "loss": 1.7518, + "epoch": 1.46, + "learning_rate": 4.271428571428572e-05, + "loss": 1.6506, "step": 255 }, { - "epoch": 0.98, - "learning_rate": 3.371501272264631e-05, - "loss": 1.6936, + "epoch": 1.46, + "learning_rate": 4.268571428571429e-05, + "loss": 1.6264, "step": 256 }, { - "epoch": 0.98, - "learning_rate": 3.365139949109415e-05, - "loss": 1.8428, + "epoch": 1.47, + "learning_rate": 4.265714285714286e-05, + "loss": 1.5609, "step": 257 }, { - "epoch": 0.98, - "learning_rate": 3.358778625954199e-05, - "loss": 1.725, + "epoch": 1.47, + "learning_rate": 4.262857142857143e-05, + "loss": 1.6017, "step": 258 }, { - "epoch": 0.99, - "learning_rate": 3.3524173027989824e-05, - "loss": 1.6898, + "epoch": 1.48, + "learning_rate": 4.26e-05, + "loss": 1.6361, "step": 259 }, { - "epoch": 0.99, - "learning_rate": 3.346055979643766e-05, - "loss": 1.7259, + "epoch": 1.49, + "learning_rate": 4.257142857142857e-05, + "loss": 1.6868, "step": 260 }, { - "epoch": 1.0, - "learning_rate": 3.33969465648855e-05, - "loss": 1.665, + "epoch": 1.49, + "learning_rate": 4.2542857142857146e-05, + "loss": 1.6199, "step": 261 }, { - "epoch": 1.0, - "learning_rate": 3.3333333333333335e-05, - "loss": 1.8255, + "epoch": 1.5, + "learning_rate": 4.251428571428572e-05, + "loss": 1.6722, "step": 262 }, { - "epoch": 1.0, - "learning_rate": 3.326972010178117e-05, - "loss": 1.7793, + "epoch": 1.5, + "learning_rate": 4.2485714285714286e-05, + "loss": 1.576, "step": 263 }, { - "epoch": 1.01, - "learning_rate": 3.320610687022901e-05, - "loss": 1.7146, + "epoch": 1.51, + "learning_rate": 4.245714285714285e-05, + "loss": 1.6202, "step": 264 }, { - "epoch": 1.01, - "learning_rate": 3.3142493638676846e-05, - "loss": 1.6895, + "epoch": 1.51, + "learning_rate": 4.242857142857143e-05, + "loss": 1.5588, "step": 265 }, { - "epoch": 1.02, - "learning_rate": 3.307888040712468e-05, - "loss": 1.7582, + "epoch": 1.52, + "learning_rate": 4.24e-05, + "loss": 1.6601, "step": 266 }, { - "epoch": 1.02, - "learning_rate": 3.301526717557252e-05, - "loss": 1.7078, + "epoch": 1.53, + "learning_rate": 4.2371428571428574e-05, + "loss": 1.7059, "step": 267 }, { - "epoch": 1.02, - "learning_rate": 3.295165394402036e-05, - "loss": 1.7425, + "epoch": 1.53, + "learning_rate": 4.234285714285715e-05, + "loss": 1.621, "step": 268 }, { - "epoch": 1.03, - "learning_rate": 3.2888040712468194e-05, - "loss": 1.6471, + "epoch": 1.54, + "learning_rate": 4.2314285714285715e-05, + "loss": 1.5852, "step": 269 }, { - "epoch": 1.03, - "learning_rate": 3.282442748091603e-05, - "loss": 1.6362, + "epoch": 1.54, + "learning_rate": 4.228571428571429e-05, + "loss": 1.5851, "step": 270 }, { - "epoch": 1.03, - "learning_rate": 3.276081424936387e-05, - "loss": 1.7659, + "epoch": 1.55, + "learning_rate": 4.225714285714286e-05, + "loss": 1.6144, "step": 271 }, { - "epoch": 1.04, - "learning_rate": 3.2697201017811706e-05, - "loss": 1.6603, + "epoch": 1.55, + "learning_rate": 4.222857142857143e-05, + "loss": 1.5543, "step": 272 }, { - "epoch": 1.04, - "learning_rate": 3.263358778625954e-05, - "loss": 1.7568, + "epoch": 1.56, + "learning_rate": 4.22e-05, + "loss": 1.5907, "step": 273 }, { - "epoch": 1.05, - "learning_rate": 3.256997455470738e-05, - "loss": 1.6339, + "epoch": 1.57, + "learning_rate": 4.2171428571428576e-05, + "loss": 1.6405, "step": 274 }, { - "epoch": 1.05, - "learning_rate": 3.250636132315522e-05, - "loss": 1.7437, + "epoch": 1.57, + "learning_rate": 4.214285714285714e-05, + "loss": 1.5462, "step": 275 }, { - "epoch": 1.05, - "learning_rate": 3.2442748091603054e-05, - "loss": 1.6258, + "epoch": 1.58, + "learning_rate": 4.211428571428572e-05, + "loss": 1.6405, "step": 276 }, { - "epoch": 1.06, - "learning_rate": 3.237913486005089e-05, - "loss": 1.6025, + "epoch": 1.58, + "learning_rate": 4.208571428571429e-05, + "loss": 1.7377, "step": 277 }, { - "epoch": 1.06, - "learning_rate": 3.231552162849873e-05, - "loss": 1.5755, + "epoch": 1.59, + "learning_rate": 4.2057142857142864e-05, + "loss": 1.6308, "step": 278 }, { - "epoch": 1.06, - "learning_rate": 3.2251908396946565e-05, - "loss": 1.6287, + "epoch": 1.59, + "learning_rate": 4.202857142857143e-05, + "loss": 1.564, "step": 279 }, { - "epoch": 1.07, - "learning_rate": 3.21882951653944e-05, - "loss": 1.6126, + "epoch": 1.6, + "learning_rate": 4.2e-05, + "loss": 1.7135, "step": 280 }, { - "epoch": 1.07, - "learning_rate": 3.2124681933842246e-05, - "loss": 1.7431, + "epoch": 1.61, + "learning_rate": 4.197142857142857e-05, + "loss": 1.5188, "step": 281 }, { - "epoch": 1.08, - "learning_rate": 3.2061068702290076e-05, - "loss": 1.7279, + "epoch": 1.61, + "learning_rate": 4.1942857142857145e-05, + "loss": 1.6114, "step": 282 }, { - "epoch": 1.08, - "learning_rate": 3.1997455470737913e-05, - "loss": 1.5722, + "epoch": 1.62, + "learning_rate": 4.191428571428572e-05, + "loss": 1.7466, "step": 283 }, { - "epoch": 1.08, - "learning_rate": 3.193384223918575e-05, - "loss": 1.5911, + "epoch": 1.62, + "learning_rate": 4.188571428571429e-05, + "loss": 1.5667, "step": 284 }, { - "epoch": 1.09, - "learning_rate": 3.187022900763359e-05, - "loss": 1.5626, + "epoch": 1.63, + "learning_rate": 4.185714285714286e-05, + "loss": 1.6834, "step": 285 }, { - "epoch": 1.09, - "learning_rate": 3.1806615776081425e-05, - "loss": 1.6939, + "epoch": 1.63, + "learning_rate": 4.1828571428571426e-05, + "loss": 1.5684, "step": 286 }, { - "epoch": 1.1, - "learning_rate": 3.174300254452927e-05, - "loss": 1.6078, + "epoch": 1.64, + "learning_rate": 4.18e-05, + "loss": 1.6175, "step": 287 }, { - "epoch": 1.1, - "learning_rate": 3.16793893129771e-05, - "loss": 1.8064, + "epoch": 1.65, + "learning_rate": 4.177142857142857e-05, + "loss": 1.6588, "step": 288 }, { - "epoch": 1.1, - "learning_rate": 3.1615776081424936e-05, - "loss": 1.7212, + "epoch": 1.65, + "learning_rate": 4.174285714285715e-05, + "loss": 1.6149, "step": 289 }, { - "epoch": 1.11, - "learning_rate": 3.155216284987277e-05, - "loss": 1.5587, + "epoch": 1.66, + "learning_rate": 4.1714285714285714e-05, + "loss": 1.6206, "step": 290 }, { - "epoch": 1.11, - "learning_rate": 3.148854961832061e-05, - "loss": 1.7182, + "epoch": 1.66, + "learning_rate": 4.168571428571429e-05, + "loss": 1.6661, "step": 291 }, { - "epoch": 1.11, - "learning_rate": 3.142493638676845e-05, - "loss": 1.6683, + "epoch": 1.67, + "learning_rate": 4.165714285714286e-05, + "loss": 1.6249, "step": 292 }, { - "epoch": 1.12, - "learning_rate": 3.136132315521629e-05, - "loss": 1.7011, + "epoch": 1.67, + "learning_rate": 4.162857142857143e-05, + "loss": 1.65, "step": 293 }, { - "epoch": 1.12, - "learning_rate": 3.129770992366413e-05, - "loss": 1.6802, + "epoch": 1.68, + "learning_rate": 4.16e-05, + "loss": 1.5942, "step": 294 }, { - "epoch": 1.13, - "learning_rate": 3.123409669211196e-05, - "loss": 1.6369, + "epoch": 1.69, + "learning_rate": 4.1571428571428575e-05, + "loss": 1.5924, "step": 295 }, { - "epoch": 1.13, - "learning_rate": 3.1170483460559795e-05, - "loss": 1.6679, + "epoch": 1.69, + "learning_rate": 4.154285714285714e-05, + "loss": 1.5961, "step": 296 }, { - "epoch": 1.13, - "learning_rate": 3.110687022900763e-05, - "loss": 1.645, + "epoch": 1.7, + "learning_rate": 4.1514285714285716e-05, + "loss": 1.6469, "step": 297 }, { - "epoch": 1.14, - "learning_rate": 3.104325699745547e-05, - "loss": 1.6605, + "epoch": 1.7, + "learning_rate": 4.148571428571429e-05, + "loss": 1.6676, "step": 298 }, { - "epoch": 1.14, - "learning_rate": 3.097964376590331e-05, - "loss": 1.5894, + "epoch": 1.71, + "learning_rate": 4.145714285714286e-05, + "loss": 1.5858, "step": 299 }, { - "epoch": 1.15, - "learning_rate": 3.091603053435115e-05, - "loss": 1.6941, + "epoch": 1.71, + "learning_rate": 4.1428571428571437e-05, + "loss": 1.5751, "step": 300 }, { - "epoch": 1.15, - "learning_rate": 3.085241730279899e-05, - "loss": 1.7037, + "epoch": 1.72, + "learning_rate": 4.14e-05, + "loss": 1.5768, "step": 301 }, { - "epoch": 1.15, - "learning_rate": 3.078880407124682e-05, - "loss": 1.595, + "epoch": 1.73, + "learning_rate": 4.137142857142857e-05, + "loss": 1.5679, "step": 302 }, { - "epoch": 1.16, - "learning_rate": 3.0725190839694655e-05, - "loss": 1.6429, + "epoch": 1.73, + "learning_rate": 4.1342857142857144e-05, + "loss": 1.4867, "step": 303 }, { - "epoch": 1.16, - "learning_rate": 3.066157760814249e-05, - "loss": 1.6009, + "epoch": 1.74, + "learning_rate": 4.131428571428572e-05, + "loss": 1.6305, "step": 304 }, { - "epoch": 1.16, - "learning_rate": 3.0597964376590336e-05, - "loss": 1.7152, + "epoch": 1.74, + "learning_rate": 4.128571428571429e-05, + "loss": 1.5274, "step": 305 }, { - "epoch": 1.17, - "learning_rate": 3.053435114503817e-05, - "loss": 1.7318, + "epoch": 1.75, + "learning_rate": 4.125714285714286e-05, + "loss": 1.7055, "step": 306 }, { - "epoch": 1.17, - "learning_rate": 3.047073791348601e-05, - "loss": 1.6328, + "epoch": 1.75, + "learning_rate": 4.122857142857143e-05, + "loss": 1.6375, "step": 307 }, { - "epoch": 1.18, - "learning_rate": 3.0407124681933847e-05, - "loss": 1.6322, + "epoch": 1.76, + "learning_rate": 4.12e-05, + "loss": 1.6141, "step": 308 }, { - "epoch": 1.18, - "learning_rate": 3.0343511450381677e-05, - "loss": 1.5534, + "epoch": 1.77, + "learning_rate": 4.117142857142857e-05, + "loss": 1.6588, "step": 309 }, { - "epoch": 1.18, - "learning_rate": 3.0279898218829518e-05, - "loss": 1.6483, + "epoch": 1.77, + "learning_rate": 4.1142857142857146e-05, + "loss": 1.5327, "step": 310 }, { - "epoch": 1.19, - "learning_rate": 3.0216284987277355e-05, - "loss": 1.5785, + "epoch": 1.78, + "learning_rate": 4.111428571428572e-05, + "loss": 1.522, "step": 311 }, { - "epoch": 1.19, - "learning_rate": 3.0152671755725192e-05, - "loss": 1.7712, + "epoch": 1.78, + "learning_rate": 4.1085714285714286e-05, + "loss": 1.525, "step": 312 }, { - "epoch": 1.19, - "learning_rate": 3.008905852417303e-05, - "loss": 1.5643, + "epoch": 1.79, + "learning_rate": 4.105714285714286e-05, + "loss": 1.6038, "step": 313 }, { - "epoch": 1.2, - "learning_rate": 3.002544529262087e-05, - "loss": 1.6814, + "epoch": 1.79, + "learning_rate": 4.1028571428571434e-05, + "loss": 1.6005, "step": 314 }, { - "epoch": 1.2, - "learning_rate": 2.9961832061068706e-05, - "loss": 1.5747, + "epoch": 1.8, + "learning_rate": 4.1e-05, + "loss": 1.685, "step": 315 }, { - "epoch": 1.21, - "learning_rate": 2.989821882951654e-05, - "loss": 1.6542, + "epoch": 1.81, + "learning_rate": 4.0971428571428574e-05, + "loss": 1.5959, "step": 316 }, { - "epoch": 1.21, - "learning_rate": 2.9834605597964377e-05, - "loss": 1.5891, + "epoch": 1.81, + "learning_rate": 4.094285714285714e-05, + "loss": 1.6811, "step": 317 }, { - "epoch": 1.21, - "learning_rate": 2.9770992366412214e-05, - "loss": 1.66, + "epoch": 1.82, + "learning_rate": 4.0914285714285715e-05, + "loss": 1.6708, "step": 318 }, { - "epoch": 1.22, - "learning_rate": 2.970737913486005e-05, - "loss": 1.7994, + "epoch": 1.82, + "learning_rate": 4.088571428571429e-05, + "loss": 1.5821, "step": 319 }, { - "epoch": 1.22, - "learning_rate": 2.9643765903307892e-05, - "loss": 1.6422, + "epoch": 1.83, + "learning_rate": 4.085714285714286e-05, + "loss": 1.6497, "step": 320 }, { - "epoch": 1.23, - "learning_rate": 2.958015267175573e-05, - "loss": 1.6212, + "epoch": 1.83, + "learning_rate": 4.0828571428571436e-05, + "loss": 1.5811, "step": 321 }, { - "epoch": 1.23, - "learning_rate": 2.9516539440203562e-05, - "loss": 1.5684, + "epoch": 1.84, + "learning_rate": 4.08e-05, + "loss": 1.5892, "step": 322 }, { - "epoch": 1.23, - "learning_rate": 2.94529262086514e-05, - "loss": 1.5803, + "epoch": 1.85, + "learning_rate": 4.077142857142857e-05, + "loss": 1.5138, "step": 323 }, { - "epoch": 1.24, - "learning_rate": 2.9389312977099237e-05, - "loss": 1.677, + "epoch": 1.85, + "learning_rate": 4.074285714285714e-05, + "loss": 1.5428, "step": 324 }, { - "epoch": 1.24, - "learning_rate": 2.9325699745547074e-05, - "loss": 1.6106, + "epoch": 1.86, + "learning_rate": 4.0714285714285717e-05, + "loss": 1.6193, "step": 325 }, { - "epoch": 1.24, - "learning_rate": 2.9262086513994914e-05, - "loss": 1.6198, + "epoch": 1.86, + "learning_rate": 4.068571428571429e-05, + "loss": 1.5663, "step": 326 }, { - "epoch": 1.25, - "learning_rate": 2.919847328244275e-05, - "loss": 1.575, + "epoch": 1.87, + "learning_rate": 4.065714285714286e-05, + "loss": 1.6173, "step": 327 }, { - "epoch": 1.25, - "learning_rate": 2.9134860050890588e-05, - "loss": 1.6581, + "epoch": 1.87, + "learning_rate": 4.062857142857143e-05, + "loss": 1.5654, "step": 328 }, { - "epoch": 1.26, - "learning_rate": 2.9071246819338422e-05, - "loss": 1.655, + "epoch": 1.88, + "learning_rate": 4.0600000000000004e-05, + "loss": 1.5059, "step": 329 }, { - "epoch": 1.26, - "learning_rate": 2.900763358778626e-05, - "loss": 1.58, + "epoch": 1.89, + "learning_rate": 4.057142857142857e-05, + "loss": 1.5655, "step": 330 }, { - "epoch": 1.26, - "learning_rate": 2.8944020356234096e-05, - "loss": 1.6716, + "epoch": 1.89, + "learning_rate": 4.0542857142857145e-05, + "loss": 1.6775, "step": 331 }, { - "epoch": 1.27, - "learning_rate": 2.8880407124681937e-05, - "loss": 1.6456, + "epoch": 1.9, + "learning_rate": 4.051428571428572e-05, + "loss": 1.6367, "step": 332 }, { - "epoch": 1.27, - "learning_rate": 2.8816793893129774e-05, - "loss": 1.6167, + "epoch": 1.9, + "learning_rate": 4.0485714285714285e-05, + "loss": 1.6234, "step": 333 }, { - "epoch": 1.27, - "learning_rate": 2.875318066157761e-05, - "loss": 1.7806, + "epoch": 1.91, + "learning_rate": 4.045714285714286e-05, + "loss": 1.6336, "step": 334 }, { - "epoch": 1.28, - "learning_rate": 2.8689567430025448e-05, - "loss": 1.5511, + "epoch": 1.91, + "learning_rate": 4.042857142857143e-05, + "loss": 1.5717, "step": 335 }, { - "epoch": 1.28, - "learning_rate": 2.862595419847328e-05, - "loss": 1.5907, + "epoch": 1.92, + "learning_rate": 4.0400000000000006e-05, + "loss": 1.6782, "step": 336 }, { - "epoch": 1.29, - "learning_rate": 2.856234096692112e-05, - "loss": 1.6821, + "epoch": 1.93, + "learning_rate": 4.037142857142857e-05, + "loss": 1.4754, "step": 337 }, { - "epoch": 1.29, - "learning_rate": 2.849872773536896e-05, - "loss": 1.6402, + "epoch": 1.93, + "learning_rate": 4.034285714285715e-05, + "loss": 1.626, "step": 338 }, { - "epoch": 1.29, - "learning_rate": 2.8435114503816796e-05, - "loss": 1.5662, + "epoch": 1.94, + "learning_rate": 4.0314285714285714e-05, + "loss": 1.5332, "step": 339 }, { - "epoch": 1.3, - "learning_rate": 2.8371501272264633e-05, - "loss": 1.6689, + "epoch": 1.94, + "learning_rate": 4.028571428571429e-05, + "loss": 1.4984, "step": 340 }, { - "epoch": 1.3, - "learning_rate": 2.830788804071247e-05, - "loss": 1.5855, + "epoch": 1.95, + "learning_rate": 4.025714285714286e-05, + "loss": 1.5877, "step": 341 }, { - "epoch": 1.31, - "learning_rate": 2.824427480916031e-05, - "loss": 1.6353, + "epoch": 1.95, + "learning_rate": 4.0228571428571434e-05, + "loss": 1.6498, "step": 342 }, { - "epoch": 1.31, - "learning_rate": 2.818066157760814e-05, - "loss": 1.6703, + "epoch": 1.96, + "learning_rate": 4.02e-05, + "loss": 1.6192, "step": 343 }, { - "epoch": 1.31, - "learning_rate": 2.8117048346055978e-05, - "loss": 1.6515, + "epoch": 1.97, + "learning_rate": 4.017142857142857e-05, + "loss": 1.5261, "step": 344 }, { - "epoch": 1.32, - "learning_rate": 2.805343511450382e-05, - "loss": 1.6773, + "epoch": 1.97, + "learning_rate": 4.014285714285714e-05, + "loss": 1.6036, "step": 345 }, { - "epoch": 1.32, - "learning_rate": 2.7989821882951656e-05, - "loss": 1.7637, + "epoch": 1.98, + "learning_rate": 4.0114285714285715e-05, + "loss": 1.6491, "step": 346 }, { - "epoch": 1.32, - "learning_rate": 2.7926208651399493e-05, - "loss": 1.6192, + "epoch": 1.98, + "learning_rate": 4.008571428571429e-05, + "loss": 1.5306, "step": 347 }, { - "epoch": 1.33, - "learning_rate": 2.7862595419847333e-05, - "loss": 1.6012, + "epoch": 1.99, + "learning_rate": 4.005714285714286e-05, + "loss": 1.5968, "step": 348 }, { - "epoch": 1.33, - "learning_rate": 2.7798982188295163e-05, - "loss": 1.6582, + "epoch": 1.99, + "learning_rate": 4.002857142857143e-05, + "loss": 1.6654, "step": 349 }, { - "epoch": 1.34, - "learning_rate": 2.7735368956743e-05, - "loss": 1.7287, + "epoch": 2.0, + "learning_rate": 4e-05, + "loss": 1.6264, "step": 350 }, { - "epoch": 1.34, - "learning_rate": 2.767175572519084e-05, - "loss": 1.7468, + "epoch": 2.01, + "learning_rate": 3.997142857142857e-05, + "loss": 1.5485, "step": 351 }, { - "epoch": 1.34, - "learning_rate": 2.7608142493638678e-05, - "loss": 1.5716, + "epoch": 2.01, + "learning_rate": 3.9942857142857144e-05, + "loss": 1.5662, "step": 352 }, { - "epoch": 1.35, - "learning_rate": 2.7544529262086515e-05, - "loss": 1.6535, + "epoch": 2.02, + "learning_rate": 3.991428571428572e-05, + "loss": 1.5641, "step": 353 }, { - "epoch": 1.35, - "learning_rate": 2.7480916030534355e-05, - "loss": 1.479, + "epoch": 2.02, + "learning_rate": 3.9885714285714284e-05, + "loss": 1.6021, "step": 354 }, { - "epoch": 1.35, - "learning_rate": 2.7417302798982193e-05, - "loss": 1.5744, + "epoch": 2.03, + "learning_rate": 3.985714285714286e-05, + "loss": 1.5712, "step": 355 }, { - "epoch": 1.36, - "learning_rate": 2.7353689567430023e-05, - "loss": 1.6393, + "epoch": 2.03, + "learning_rate": 3.982857142857143e-05, + "loss": 1.4838, "step": 356 }, { - "epoch": 1.36, - "learning_rate": 2.7290076335877863e-05, - "loss": 1.5814, + "epoch": 2.04, + "learning_rate": 3.9800000000000005e-05, + "loss": 1.5519, "step": 357 }, { - "epoch": 1.37, - "learning_rate": 2.72264631043257e-05, - "loss": 1.6316, + "epoch": 2.05, + "learning_rate": 3.977142857142857e-05, + "loss": 1.4563, "step": 358 }, { - "epoch": 1.37, - "learning_rate": 2.7162849872773537e-05, - "loss": 1.6321, + "epoch": 2.05, + "learning_rate": 3.9742857142857146e-05, + "loss": 1.4443, "step": 359 }, { - "epoch": 1.37, - "learning_rate": 2.7099236641221375e-05, - "loss": 1.6176, + "epoch": 2.06, + "learning_rate": 3.971428571428571e-05, + "loss": 1.5643, "step": 360 }, { - "epoch": 1.38, - "learning_rate": 2.7035623409669215e-05, - "loss": 1.6889, + "epoch": 2.06, + "learning_rate": 3.9685714285714286e-05, + "loss": 1.5135, "step": 361 }, { - "epoch": 1.38, - "learning_rate": 2.6972010178117052e-05, - "loss": 1.6337, + "epoch": 2.07, + "learning_rate": 3.965714285714286e-05, + "loss": 1.4882, "step": 362 }, { - "epoch": 1.39, - "learning_rate": 2.6908396946564886e-05, - "loss": 1.6375, + "epoch": 2.07, + "learning_rate": 3.9628571428571433e-05, + "loss": 1.4846, "step": 363 }, { - "epoch": 1.39, - "learning_rate": 2.6844783715012723e-05, - "loss": 1.6314, + "epoch": 2.08, + "learning_rate": 3.960000000000001e-05, + "loss": 1.4128, "step": 364 }, { - "epoch": 1.39, - "learning_rate": 2.678117048346056e-05, - "loss": 1.73, + "epoch": 2.09, + "learning_rate": 3.9571428571428574e-05, + "loss": 1.5847, "step": 365 }, { - "epoch": 1.4, - "learning_rate": 2.6717557251908397e-05, - "loss": 1.676, + "epoch": 2.09, + "learning_rate": 3.954285714285714e-05, + "loss": 1.5023, "step": 366 }, { - "epoch": 1.4, - "learning_rate": 2.6653944020356237e-05, - "loss": 1.6735, + "epoch": 2.1, + "learning_rate": 3.9514285714285714e-05, + "loss": 1.4424, "step": 367 }, { - "epoch": 1.4, - "learning_rate": 2.6590330788804074e-05, - "loss": 1.6559, + "epoch": 2.1, + "learning_rate": 3.948571428571429e-05, + "loss": 1.5342, "step": 368 }, { - "epoch": 1.41, - "learning_rate": 2.652671755725191e-05, - "loss": 1.5847, + "epoch": 2.11, + "learning_rate": 3.945714285714286e-05, + "loss": 1.5231, "step": 369 }, { - "epoch": 1.41, - "learning_rate": 2.6463104325699745e-05, - "loss": 1.4857, + "epoch": 2.11, + "learning_rate": 3.942857142857143e-05, + "loss": 1.4948, "step": 370 }, { - "epoch": 1.42, - "learning_rate": 2.6399491094147582e-05, - "loss": 1.5846, + "epoch": 2.12, + "learning_rate": 3.94e-05, + "loss": 1.5297, "step": 371 }, { - "epoch": 1.42, - "learning_rate": 2.633587786259542e-05, - "loss": 1.6077, + "epoch": 2.13, + "learning_rate": 3.9371428571428576e-05, + "loss": 1.5705, "step": 372 }, { - "epoch": 1.42, - "learning_rate": 2.627226463104326e-05, - "loss": 1.5612, + "epoch": 2.13, + "learning_rate": 3.934285714285714e-05, + "loss": 1.5808, "step": 373 }, { - "epoch": 1.43, - "learning_rate": 2.6208651399491097e-05, - "loss": 1.7214, + "epoch": 2.14, + "learning_rate": 3.9314285714285716e-05, + "loss": 1.6704, "step": 374 }, { - "epoch": 1.43, - "learning_rate": 2.6145038167938934e-05, - "loss": 1.6248, + "epoch": 2.14, + "learning_rate": 3.928571428571429e-05, + "loss": 1.5372, "step": 375 }, { - "epoch": 1.44, - "learning_rate": 2.608142493638677e-05, - "loss": 1.5545, + "epoch": 2.15, + "learning_rate": 3.925714285714286e-05, + "loss": 1.548, "step": 376 }, { - "epoch": 1.44, - "learning_rate": 2.6017811704834605e-05, - "loss": 1.5943, + "epoch": 2.15, + "learning_rate": 3.922857142857143e-05, + "loss": 1.4958, "step": 377 }, { - "epoch": 1.44, - "learning_rate": 2.5954198473282442e-05, - "loss": 1.5038, + "epoch": 2.16, + "learning_rate": 3.9200000000000004e-05, + "loss": 1.4967, "step": 378 }, { - "epoch": 1.45, - "learning_rate": 2.5890585241730282e-05, - "loss": 1.4453, + "epoch": 2.17, + "learning_rate": 3.917142857142858e-05, + "loss": 1.4774, "step": 379 }, { - "epoch": 1.45, - "learning_rate": 2.582697201017812e-05, - "loss": 1.6681, + "epoch": 2.17, + "learning_rate": 3.9142857142857145e-05, + "loss": 1.5052, "step": 380 }, { - "epoch": 1.45, - "learning_rate": 2.5763358778625956e-05, - "loss": 1.5682, + "epoch": 2.18, + "learning_rate": 3.911428571428571e-05, + "loss": 1.4988, "step": 381 }, { - "epoch": 1.46, - "learning_rate": 2.5699745547073793e-05, - "loss": 1.6644, + "epoch": 2.18, + "learning_rate": 3.9085714285714285e-05, + "loss": 1.507, "step": 382 }, { - "epoch": 1.46, - "learning_rate": 2.5636132315521627e-05, - "loss": 1.565, + "epoch": 2.19, + "learning_rate": 3.905714285714286e-05, + "loss": 1.439, "step": 383 }, { - "epoch": 1.47, - "learning_rate": 2.5572519083969464e-05, - "loss": 1.656, + "epoch": 2.19, + "learning_rate": 3.902857142857143e-05, + "loss": 1.4346, "step": 384 }, { - "epoch": 1.47, - "learning_rate": 2.5508905852417305e-05, - "loss": 1.496, + "epoch": 2.2, + "learning_rate": 3.9000000000000006e-05, + "loss": 1.5832, "step": 385 }, { - "epoch": 1.47, - "learning_rate": 2.5445292620865142e-05, - "loss": 1.6039, + "epoch": 2.21, + "learning_rate": 3.897142857142857e-05, + "loss": 1.5271, "step": 386 }, { - "epoch": 1.48, - "learning_rate": 2.538167938931298e-05, - "loss": 1.5849, + "epoch": 2.21, + "learning_rate": 3.894285714285714e-05, + "loss": 1.553, "step": 387 }, { - "epoch": 1.48, - "learning_rate": 2.5318066157760816e-05, - "loss": 1.6688, + "epoch": 2.22, + "learning_rate": 3.8914285714285713e-05, + "loss": 1.5303, "step": 388 }, { - "epoch": 1.48, - "learning_rate": 2.5254452926208656e-05, - "loss": 1.6819, + "epoch": 2.22, + "learning_rate": 3.888571428571429e-05, + "loss": 1.4648, "step": 389 }, { - "epoch": 1.49, - "learning_rate": 2.5190839694656487e-05, - "loss": 1.6531, + "epoch": 2.23, + "learning_rate": 3.885714285714286e-05, + "loss": 1.5702, "step": 390 }, { - "epoch": 1.49, - "learning_rate": 2.5127226463104324e-05, - "loss": 1.6093, + "epoch": 2.23, + "learning_rate": 3.882857142857143e-05, + "loss": 1.527, "step": 391 }, { - "epoch": 1.5, - "learning_rate": 2.5063613231552164e-05, - "loss": 1.594, + "epoch": 2.24, + "learning_rate": 3.88e-05, + "loss": 1.5519, "step": 392 }, { - "epoch": 1.5, - "learning_rate": 2.5e-05, - "loss": 1.6348, + "epoch": 2.25, + "learning_rate": 3.8771428571428575e-05, + "loss": 1.4864, "step": 393 }, { - "epoch": 1.5, - "learning_rate": 2.4936386768447838e-05, - "loss": 1.5857, + "epoch": 2.25, + "learning_rate": 3.874285714285715e-05, + "loss": 1.4479, "step": 394 }, { - "epoch": 1.51, - "learning_rate": 2.4872773536895675e-05, - "loss": 1.5962, + "epoch": 2.26, + "learning_rate": 3.8714285714285715e-05, + "loss": 1.4971, "step": 395 }, { - "epoch": 1.51, - "learning_rate": 2.4809160305343512e-05, - "loss": 1.5256, + "epoch": 2.26, + "learning_rate": 3.868571428571429e-05, + "loss": 1.51, "step": 396 }, { - "epoch": 1.52, - "learning_rate": 2.474554707379135e-05, - "loss": 1.5459, + "epoch": 2.27, + "learning_rate": 3.8657142857142856e-05, + "loss": 1.5306, "step": 397 }, { - "epoch": 1.52, - "learning_rate": 2.468193384223919e-05, - "loss": 1.7054, + "epoch": 2.27, + "learning_rate": 3.862857142857143e-05, + "loss": 1.471, "step": 398 }, { - "epoch": 1.52, - "learning_rate": 2.4618320610687024e-05, - "loss": 1.5398, + "epoch": 2.28, + "learning_rate": 3.86e-05, + "loss": 1.5155, "step": 399 }, { - "epoch": 1.53, - "learning_rate": 2.455470737913486e-05, - "loss": 1.7594, + "epoch": 2.29, + "learning_rate": 3.857142857142858e-05, + "loss": 1.533, "step": 400 }, { - "epoch": 1.53, - "learning_rate": 2.4491094147582698e-05, - "loss": 1.614, + "epoch": 2.29, + "learning_rate": 3.854285714285715e-05, + "loss": 1.5073, "step": 401 }, { - "epoch": 1.53, - "learning_rate": 2.4427480916030535e-05, - "loss": 1.5603, + "epoch": 2.3, + "learning_rate": 3.851428571428571e-05, + "loss": 1.5348, "step": 402 }, { - "epoch": 1.54, - "learning_rate": 2.4363867684478372e-05, - "loss": 1.6489, + "epoch": 2.3, + "learning_rate": 3.8485714285714284e-05, + "loss": 1.4625, "step": 403 }, { - "epoch": 1.54, - "learning_rate": 2.430025445292621e-05, - "loss": 1.5972, + "epoch": 2.31, + "learning_rate": 3.845714285714286e-05, + "loss": 1.5146, "step": 404 }, { - "epoch": 1.55, - "learning_rate": 2.4236641221374046e-05, - "loss": 1.523, + "epoch": 2.31, + "learning_rate": 3.842857142857143e-05, + "loss": 1.4724, "step": 405 }, { - "epoch": 1.55, - "learning_rate": 2.4173027989821883e-05, - "loss": 1.6404, + "epoch": 2.32, + "learning_rate": 3.8400000000000005e-05, + "loss": 1.4763, "step": 406 }, { - "epoch": 1.55, - "learning_rate": 2.410941475826972e-05, - "loss": 1.514, + "epoch": 2.33, + "learning_rate": 3.837142857142857e-05, + "loss": 1.4617, "step": 407 }, { - "epoch": 1.56, - "learning_rate": 2.404580152671756e-05, - "loss": 1.5987, + "epoch": 2.33, + "learning_rate": 3.8342857142857146e-05, + "loss": 1.5511, "step": 408 }, { - "epoch": 1.56, - "learning_rate": 2.3982188295165394e-05, - "loss": 1.5628, + "epoch": 2.34, + "learning_rate": 3.831428571428571e-05, + "loss": 1.6107, "step": 409 }, { - "epoch": 1.56, - "learning_rate": 2.391857506361323e-05, - "loss": 1.5771, + "epoch": 2.34, + "learning_rate": 3.8285714285714286e-05, + "loss": 1.5942, "step": 410 }, { - "epoch": 1.57, - "learning_rate": 2.3854961832061072e-05, - "loss": 1.6674, + "epoch": 2.35, + "learning_rate": 3.825714285714286e-05, + "loss": 1.4694, "step": 411 }, { - "epoch": 1.57, - "learning_rate": 2.3791348600508906e-05, - "loss": 1.4884, + "epoch": 2.35, + "learning_rate": 3.822857142857143e-05, + "loss": 1.5044, "step": 412 }, { - "epoch": 1.58, - "learning_rate": 2.3727735368956743e-05, - "loss": 1.6313, + "epoch": 2.36, + "learning_rate": 3.82e-05, + "loss": 1.4824, "step": 413 }, { - "epoch": 1.58, - "learning_rate": 2.3664122137404583e-05, - "loss": 1.6976, + "epoch": 2.37, + "learning_rate": 3.8171428571428574e-05, + "loss": 1.6118, "step": 414 }, { - "epoch": 1.58, - "learning_rate": 2.360050890585242e-05, - "loss": 1.7927, + "epoch": 2.37, + "learning_rate": 3.814285714285715e-05, + "loss": 1.5563, "step": 415 }, { - "epoch": 1.59, - "learning_rate": 2.3536895674300254e-05, - "loss": 1.6724, + "epoch": 2.38, + "learning_rate": 3.8114285714285714e-05, + "loss": 1.4661, "step": 416 }, { - "epoch": 1.59, - "learning_rate": 2.3473282442748094e-05, - "loss": 1.5105, + "epoch": 2.38, + "learning_rate": 3.808571428571429e-05, + "loss": 1.5446, "step": 417 }, { - "epoch": 1.6, - "learning_rate": 2.340966921119593e-05, - "loss": 1.5567, + "epoch": 2.39, + "learning_rate": 3.8057142857142855e-05, + "loss": 1.5212, "step": 418 }, { - "epoch": 1.6, - "learning_rate": 2.3346055979643765e-05, - "loss": 1.7265, + "epoch": 2.39, + "learning_rate": 3.802857142857143e-05, + "loss": 1.4784, "step": 419 }, { - "epoch": 1.6, - "learning_rate": 2.3282442748091605e-05, - "loss": 1.5087, + "epoch": 2.4, + "learning_rate": 3.8e-05, + "loss": 1.5487, "step": 420 }, { - "epoch": 1.61, - "learning_rate": 2.3218829516539443e-05, - "loss": 1.5541, + "epoch": 2.41, + "learning_rate": 3.7971428571428576e-05, + "loss": 1.5192, "step": 421 }, { - "epoch": 1.61, - "learning_rate": 2.3155216284987276e-05, - "loss": 1.6069, + "epoch": 2.41, + "learning_rate": 3.794285714285715e-05, + "loss": 1.5653, "step": 422 }, { - "epoch": 1.61, - "learning_rate": 2.3091603053435117e-05, - "loss": 1.722, + "epoch": 2.42, + "learning_rate": 3.7914285714285716e-05, + "loss": 1.5503, "step": 423 }, { - "epoch": 1.62, - "learning_rate": 2.3027989821882954e-05, - "loss": 1.6451, + "epoch": 2.42, + "learning_rate": 3.788571428571428e-05, + "loss": 1.4077, "step": 424 }, { - "epoch": 1.62, - "learning_rate": 2.296437659033079e-05, - "loss": 1.6769, + "epoch": 2.43, + "learning_rate": 3.785714285714286e-05, + "loss": 1.5807, "step": 425 }, { - "epoch": 1.63, - "learning_rate": 2.2900763358778628e-05, - "loss": 1.5947, + "epoch": 2.43, + "learning_rate": 3.782857142857143e-05, + "loss": 1.5496, "step": 426 }, { - "epoch": 1.63, - "learning_rate": 2.2837150127226465e-05, - "loss": 1.6694, + "epoch": 2.44, + "learning_rate": 3.7800000000000004e-05, + "loss": 1.4505, "step": 427 }, { - "epoch": 1.63, - "learning_rate": 2.2773536895674302e-05, - "loss": 1.6272, + "epoch": 2.45, + "learning_rate": 3.777142857142858e-05, + "loss": 1.313, "step": 428 }, { - "epoch": 1.64, - "learning_rate": 2.270992366412214e-05, - "loss": 1.4854, + "epoch": 2.45, + "learning_rate": 3.7742857142857145e-05, + "loss": 1.4256, "step": 429 }, { - "epoch": 1.64, - "learning_rate": 2.2646310432569976e-05, - "loss": 1.6514, + "epoch": 2.46, + "learning_rate": 3.771428571428572e-05, + "loss": 1.5583, "step": 430 }, { - "epoch": 1.65, - "learning_rate": 2.2582697201017813e-05, - "loss": 1.5813, + "epoch": 2.46, + "learning_rate": 3.7685714285714285e-05, + "loss": 1.5243, "step": 431 }, { - "epoch": 1.65, - "learning_rate": 2.2519083969465647e-05, - "loss": 1.6264, + "epoch": 2.47, + "learning_rate": 3.765714285714286e-05, + "loss": 1.4804, "step": 432 }, { - "epoch": 1.65, - "learning_rate": 2.2455470737913487e-05, - "loss": 1.6744, + "epoch": 2.47, + "learning_rate": 3.762857142857143e-05, + "loss": 1.5216, "step": 433 }, { - "epoch": 1.66, - "learning_rate": 2.2391857506361324e-05, - "loss": 1.5913, + "epoch": 2.48, + "learning_rate": 3.76e-05, + "loss": 1.4271, "step": 434 }, { - "epoch": 1.66, - "learning_rate": 2.232824427480916e-05, - "loss": 1.5928, + "epoch": 2.49, + "learning_rate": 3.757142857142857e-05, + "loss": 1.4489, "step": 435 }, { - "epoch": 1.66, - "learning_rate": 2.2264631043257e-05, - "loss": 1.676, + "epoch": 2.49, + "learning_rate": 3.7542857142857146e-05, + "loss": 1.4317, "step": 436 }, { - "epoch": 1.67, - "learning_rate": 2.2201017811704836e-05, - "loss": 1.5805, + "epoch": 2.5, + "learning_rate": 3.751428571428572e-05, + "loss": 1.439, "step": 437 }, { - "epoch": 1.67, - "learning_rate": 2.2137404580152673e-05, - "loss": 1.7112, + "epoch": 2.5, + "learning_rate": 3.748571428571429e-05, + "loss": 1.4071, "step": 438 }, { - "epoch": 1.68, - "learning_rate": 2.207379134860051e-05, - "loss": 1.6407, + "epoch": 2.51, + "learning_rate": 3.745714285714286e-05, + "loss": 1.6372, "step": 439 }, { - "epoch": 1.68, - "learning_rate": 2.2010178117048347e-05, - "loss": 1.5911, + "epoch": 2.51, + "learning_rate": 3.742857142857143e-05, + "loss": 1.5466, "step": 440 }, { - "epoch": 1.68, - "learning_rate": 2.1946564885496184e-05, - "loss": 1.6219, + "epoch": 2.52, + "learning_rate": 3.74e-05, + "loss": 1.4779, "step": 441 }, { - "epoch": 1.69, - "learning_rate": 2.1882951653944024e-05, - "loss": 1.5206, + "epoch": 2.53, + "learning_rate": 3.7371428571428575e-05, + "loss": 1.5009, "step": 442 }, { - "epoch": 1.69, - "learning_rate": 2.1819338422391858e-05, - "loss": 1.5171, + "epoch": 2.53, + "learning_rate": 3.734285714285715e-05, + "loss": 1.4252, "step": 443 }, { - "epoch": 1.69, - "learning_rate": 2.1755725190839695e-05, - "loss": 1.6333, + "epoch": 2.54, + "learning_rate": 3.7314285714285715e-05, + "loss": 1.5151, "step": 444 }, { - "epoch": 1.7, - "learning_rate": 2.1692111959287532e-05, - "loss": 1.6168, + "epoch": 2.54, + "learning_rate": 3.728571428571428e-05, + "loss": 1.5126, "step": 445 }, { - "epoch": 1.7, - "learning_rate": 2.162849872773537e-05, - "loss": 1.6254, + "epoch": 2.55, + "learning_rate": 3.7257142857142856e-05, + "loss": 1.4436, "step": 446 }, { - "epoch": 1.71, - "learning_rate": 2.1564885496183206e-05, - "loss": 1.7057, + "epoch": 2.55, + "learning_rate": 3.722857142857143e-05, + "loss": 1.4778, "step": 447 }, { - "epoch": 1.71, - "learning_rate": 2.1501272264631043e-05, - "loss": 1.5022, + "epoch": 2.56, + "learning_rate": 3.72e-05, + "loss": 1.5047, "step": 448 }, { - "epoch": 1.71, - "learning_rate": 2.143765903307888e-05, - "loss": 1.5533, + "epoch": 2.57, + "learning_rate": 3.717142857142858e-05, + "loss": 1.4456, "step": 449 }, { - "epoch": 1.72, - "learning_rate": 2.1374045801526718e-05, - "loss": 1.5795, + "epoch": 2.57, + "learning_rate": 3.7142857142857143e-05, + "loss": 1.4651, "step": 450 }, { - "epoch": 1.72, - "learning_rate": 2.1310432569974555e-05, - "loss": 1.5262, + "epoch": 2.58, + "learning_rate": 3.711428571428572e-05, + "loss": 1.5173, "step": 451 }, { - "epoch": 1.73, - "learning_rate": 2.1246819338422395e-05, - "loss": 1.5867, + "epoch": 2.58, + "learning_rate": 3.7085714285714284e-05, + "loss": 1.4479, "step": 452 }, { - "epoch": 1.73, - "learning_rate": 2.118320610687023e-05, - "loss": 1.4787, + "epoch": 2.59, + "learning_rate": 3.705714285714286e-05, + "loss": 1.5072, "step": 453 }, { - "epoch": 1.73, - "learning_rate": 2.1119592875318066e-05, - "loss": 1.4586, + "epoch": 2.59, + "learning_rate": 3.702857142857143e-05, + "loss": 1.4199, "step": 454 }, { - "epoch": 1.74, - "learning_rate": 2.1055979643765906e-05, - "loss": 1.6172, + "epoch": 2.6, + "learning_rate": 3.7e-05, + "loss": 1.528, "step": 455 }, { - "epoch": 1.74, - "learning_rate": 2.099236641221374e-05, - "loss": 1.6178, + "epoch": 2.61, + "learning_rate": 3.697142857142857e-05, + "loss": 1.4877, "step": 456 }, { - "epoch": 1.74, - "learning_rate": 2.0928753180661577e-05, - "loss": 1.4876, + "epoch": 2.61, + "learning_rate": 3.6942857142857145e-05, + "loss": 1.4451, "step": 457 }, { - "epoch": 1.75, - "learning_rate": 2.0865139949109417e-05, - "loss": 1.5535, + "epoch": 2.62, + "learning_rate": 3.691428571428572e-05, + "loss": 1.5055, "step": 458 }, { - "epoch": 1.75, - "learning_rate": 2.0801526717557255e-05, - "loss": 1.6825, + "epoch": 2.62, + "learning_rate": 3.688571428571429e-05, + "loss": 1.6091, "step": 459 }, { - "epoch": 1.76, - "learning_rate": 2.0737913486005088e-05, - "loss": 1.6678, + "epoch": 2.63, + "learning_rate": 3.685714285714286e-05, + "loss": 1.4793, "step": 460 }, { - "epoch": 1.76, - "learning_rate": 2.067430025445293e-05, - "loss": 1.581, + "epoch": 2.63, + "learning_rate": 3.6828571428571426e-05, + "loss": 1.5033, "step": 461 }, { - "epoch": 1.76, - "learning_rate": 2.0610687022900766e-05, - "loss": 1.6045, + "epoch": 2.64, + "learning_rate": 3.68e-05, + "loss": 1.4965, "step": 462 }, { - "epoch": 1.77, - "learning_rate": 2.05470737913486e-05, - "loss": 1.6811, + "epoch": 2.65, + "learning_rate": 3.6771428571428574e-05, + "loss": 1.4552, "step": 463 }, { - "epoch": 1.77, - "learning_rate": 2.048346055979644e-05, - "loss": 1.5485, + "epoch": 2.65, + "learning_rate": 3.674285714285715e-05, + "loss": 1.5252, "step": 464 }, { - "epoch": 1.77, - "learning_rate": 2.0419847328244277e-05, - "loss": 1.4733, + "epoch": 2.66, + "learning_rate": 3.671428571428572e-05, + "loss": 1.5216, "step": 465 }, { - "epoch": 1.78, - "learning_rate": 2.035623409669211e-05, - "loss": 1.5043, + "epoch": 2.66, + "learning_rate": 3.668571428571429e-05, + "loss": 1.4452, "step": 466 }, { - "epoch": 1.78, - "learning_rate": 2.029262086513995e-05, - "loss": 1.4733, + "epoch": 2.67, + "learning_rate": 3.6657142857142855e-05, + "loss": 1.4532, "step": 467 }, { - "epoch": 1.79, - "learning_rate": 2.0229007633587788e-05, - "loss": 1.5753, + "epoch": 2.67, + "learning_rate": 3.662857142857143e-05, + "loss": 1.5612, "step": 468 }, { - "epoch": 1.79, - "learning_rate": 2.0165394402035625e-05, - "loss": 1.5792, + "epoch": 2.68, + "learning_rate": 3.66e-05, + "loss": 1.5364, "step": 469 }, { - "epoch": 1.79, - "learning_rate": 2.0101781170483462e-05, - "loss": 1.5859, + "epoch": 2.69, + "learning_rate": 3.6571428571428576e-05, + "loss": 1.4591, "step": 470 }, { - "epoch": 1.8, - "learning_rate": 2.00381679389313e-05, - "loss": 1.6503, + "epoch": 2.69, + "learning_rate": 3.654285714285714e-05, + "loss": 1.5026, "step": 471 }, { - "epoch": 1.8, - "learning_rate": 1.9974554707379136e-05, - "loss": 1.6675, + "epoch": 2.7, + "learning_rate": 3.6514285714285716e-05, + "loss": 1.5257, "step": 472 }, { - "epoch": 1.81, - "learning_rate": 1.9910941475826974e-05, - "loss": 1.6658, + "epoch": 2.7, + "learning_rate": 3.648571428571429e-05, + "loss": 1.3648, "step": 473 }, { - "epoch": 1.81, - "learning_rate": 1.984732824427481e-05, - "loss": 1.589, + "epoch": 2.71, + "learning_rate": 3.6457142857142857e-05, + "loss": 1.5092, "step": 474 }, { - "epoch": 1.81, - "learning_rate": 1.9783715012722648e-05, - "loss": 1.6497, + "epoch": 2.71, + "learning_rate": 3.642857142857143e-05, + "loss": 1.4937, "step": 475 }, { - "epoch": 1.82, - "learning_rate": 1.9720101781170485e-05, - "loss": 1.622, + "epoch": 2.72, + "learning_rate": 3.6400000000000004e-05, + "loss": 1.4092, "step": 476 }, { - "epoch": 1.82, - "learning_rate": 1.9656488549618322e-05, - "loss": 1.6244, + "epoch": 2.73, + "learning_rate": 3.637142857142857e-05, + "loss": 1.4851, "step": 477 }, { - "epoch": 1.82, - "learning_rate": 1.959287531806616e-05, - "loss": 1.6003, + "epoch": 2.73, + "learning_rate": 3.6342857142857144e-05, + "loss": 1.4616, "step": 478 }, { - "epoch": 1.83, - "learning_rate": 1.9529262086513996e-05, - "loss": 1.6027, + "epoch": 2.74, + "learning_rate": 3.631428571428572e-05, + "loss": 1.5117, "step": 479 }, { - "epoch": 1.83, - "learning_rate": 1.9465648854961833e-05, - "loss": 1.6063, + "epoch": 2.74, + "learning_rate": 3.628571428571429e-05, + "loss": 1.5603, "step": 480 }, { - "epoch": 1.84, - "learning_rate": 1.940203562340967e-05, - "loss": 1.5458, + "epoch": 2.75, + "learning_rate": 3.625714285714286e-05, + "loss": 1.4146, "step": 481 }, { - "epoch": 1.84, - "learning_rate": 1.9338422391857507e-05, - "loss": 1.5434, + "epoch": 2.75, + "learning_rate": 3.6228571428571425e-05, + "loss": 1.4809, "step": 482 }, { - "epoch": 1.84, - "learning_rate": 1.9274809160305344e-05, - "loss": 1.5534, + "epoch": 2.76, + "learning_rate": 3.62e-05, + "loss": 1.4996, "step": 483 }, { - "epoch": 1.85, - "learning_rate": 1.921119592875318e-05, - "loss": 1.4813, + "epoch": 2.77, + "learning_rate": 3.617142857142857e-05, + "loss": 1.4392, "step": 484 }, { - "epoch": 1.85, - "learning_rate": 1.914758269720102e-05, - "loss": 1.5318, + "epoch": 2.77, + "learning_rate": 3.6142857142857146e-05, + "loss": 1.4901, "step": 485 }, { - "epoch": 1.85, - "learning_rate": 1.9083969465648855e-05, - "loss": 1.5503, + "epoch": 2.78, + "learning_rate": 3.611428571428572e-05, + "loss": 1.5351, "step": 486 }, { - "epoch": 1.86, - "learning_rate": 1.9020356234096693e-05, - "loss": 1.6099, + "epoch": 2.78, + "learning_rate": 3.608571428571429e-05, + "loss": 1.4808, "step": 487 }, { - "epoch": 1.86, - "learning_rate": 1.895674300254453e-05, - "loss": 1.5531, + "epoch": 2.79, + "learning_rate": 3.605714285714286e-05, + "loss": 1.5067, "step": 488 }, { - "epoch": 1.87, - "learning_rate": 1.8893129770992367e-05, - "loss": 1.5838, + "epoch": 2.79, + "learning_rate": 3.602857142857143e-05, + "loss": 1.488, "step": 489 }, { - "epoch": 1.87, - "learning_rate": 1.8829516539440204e-05, - "loss": 1.5685, + "epoch": 2.8, + "learning_rate": 3.6e-05, + "loss": 1.5572, "step": 490 }, { - "epoch": 1.87, - "learning_rate": 1.876590330788804e-05, - "loss": 1.5997, + "epoch": 2.81, + "learning_rate": 3.5971428571428575e-05, + "loss": 1.467, "step": 491 }, { - "epoch": 1.88, - "learning_rate": 1.8702290076335878e-05, - "loss": 1.5092, + "epoch": 2.81, + "learning_rate": 3.594285714285714e-05, + "loss": 1.4395, "step": 492 }, { - "epoch": 1.88, - "learning_rate": 1.8638676844783715e-05, - "loss": 1.4721, + "epoch": 2.82, + "learning_rate": 3.5914285714285715e-05, + "loss": 1.4298, "step": 493 }, { - "epoch": 1.89, - "learning_rate": 1.8575063613231552e-05, - "loss": 1.5468, + "epoch": 2.82, + "learning_rate": 3.588571428571429e-05, + "loss": 1.4397, "step": 494 }, { - "epoch": 1.89, - "learning_rate": 1.851145038167939e-05, - "loss": 1.6105, + "epoch": 2.83, + "learning_rate": 3.585714285714286e-05, + "loss": 1.4718, "step": 495 }, { - "epoch": 1.89, - "learning_rate": 1.844783715012723e-05, - "loss": 1.6517, + "epoch": 2.83, + "learning_rate": 3.582857142857143e-05, + "loss": 1.4222, "step": 496 }, { - "epoch": 1.9, - "learning_rate": 1.8384223918575063e-05, - "loss": 1.5767, + "epoch": 2.84, + "learning_rate": 3.58e-05, + "loss": 1.419, "step": 497 }, { - "epoch": 1.9, - "learning_rate": 1.83206106870229e-05, - "loss": 1.6751, + "epoch": 2.85, + "learning_rate": 3.577142857142857e-05, + "loss": 1.3971, "step": 498 }, { - "epoch": 1.9, - "learning_rate": 1.825699745547074e-05, - "loss": 1.6003, + "epoch": 2.85, + "learning_rate": 3.574285714285714e-05, + "loss": 1.4151, "step": 499 }, { - "epoch": 1.91, - "learning_rate": 1.8193384223918574e-05, - "loss": 1.5979, + "epoch": 2.86, + "learning_rate": 3.571428571428572e-05, + "loss": 1.4268, "step": 500 }, { - "epoch": 1.91, - "learning_rate": 1.812977099236641e-05, - "loss": 1.6409, + "epoch": 2.86, + "learning_rate": 3.568571428571429e-05, + "loss": 1.494, "step": 501 }, { - "epoch": 1.92, - "learning_rate": 1.8066157760814252e-05, - "loss": 1.5534, + "epoch": 2.87, + "learning_rate": 3.5657142857142864e-05, + "loss": 1.457, "step": 502 }, { - "epoch": 1.92, - "learning_rate": 1.800254452926209e-05, - "loss": 1.717, + "epoch": 2.87, + "learning_rate": 3.562857142857143e-05, + "loss": 1.5671, "step": 503 }, { - "epoch": 1.92, - "learning_rate": 1.7938931297709923e-05, - "loss": 1.458, + "epoch": 2.88, + "learning_rate": 3.56e-05, + "loss": 1.5733, "step": 504 }, { - "epoch": 1.93, - "learning_rate": 1.7875318066157763e-05, - "loss": 1.5411, + "epoch": 2.89, + "learning_rate": 3.557142857142857e-05, + "loss": 1.4372, "step": 505 }, { - "epoch": 1.93, - "learning_rate": 1.78117048346056e-05, - "loss": 1.626, + "epoch": 2.89, + "learning_rate": 3.5542857142857145e-05, + "loss": 1.5491, "step": 506 }, { - "epoch": 1.94, - "learning_rate": 1.7748091603053434e-05, - "loss": 1.5568, + "epoch": 2.9, + "learning_rate": 3.551428571428572e-05, + "loss": 1.4364, "step": 507 }, { - "epoch": 1.94, - "learning_rate": 1.7684478371501274e-05, - "loss": 1.5211, + "epoch": 2.9, + "learning_rate": 3.5485714285714286e-05, + "loss": 1.5118, "step": 508 }, { - "epoch": 1.94, - "learning_rate": 1.762086513994911e-05, - "loss": 1.5756, + "epoch": 2.91, + "learning_rate": 3.545714285714286e-05, + "loss": 1.3832, "step": 509 }, { - "epoch": 1.95, - "learning_rate": 1.7557251908396945e-05, - "loss": 1.4589, + "epoch": 2.91, + "learning_rate": 3.5428571428571426e-05, + "loss": 1.4804, "step": 510 }, { - "epoch": 1.95, - "learning_rate": 1.7493638676844786e-05, - "loss": 1.5143, + "epoch": 2.92, + "learning_rate": 3.54e-05, + "loss": 1.4572, "step": 511 }, { - "epoch": 1.95, - "learning_rate": 1.7430025445292623e-05, - "loss": 1.6454, + "epoch": 2.93, + "learning_rate": 3.5371428571428574e-05, + "loss": 1.4794, "step": 512 }, { - "epoch": 1.96, - "learning_rate": 1.736641221374046e-05, - "loss": 1.6771, + "epoch": 2.93, + "learning_rate": 3.534285714285715e-05, + "loss": 1.4656, "step": 513 }, { - "epoch": 1.96, - "learning_rate": 1.7302798982188297e-05, - "loss": 1.604, + "epoch": 2.94, + "learning_rate": 3.5314285714285714e-05, + "loss": 1.4926, "step": 514 }, { - "epoch": 1.97, - "learning_rate": 1.7239185750636134e-05, - "loss": 1.4414, + "epoch": 2.94, + "learning_rate": 3.528571428571429e-05, + "loss": 1.528, "step": 515 }, { - "epoch": 1.97, - "learning_rate": 1.717557251908397e-05, - "loss": 1.5744, + "epoch": 2.95, + "learning_rate": 3.525714285714286e-05, + "loss": 1.5246, "step": 516 }, { - "epoch": 1.97, - "learning_rate": 1.7111959287531808e-05, - "loss": 1.6602, + "epoch": 2.95, + "learning_rate": 3.5228571428571435e-05, + "loss": 1.4466, "step": 517 }, { - "epoch": 1.98, - "learning_rate": 1.7048346055979645e-05, - "loss": 1.6471, + "epoch": 2.96, + "learning_rate": 3.52e-05, + "loss": 1.4569, "step": 518 }, { - "epoch": 1.98, - "learning_rate": 1.6984732824427482e-05, - "loss": 1.5913, + "epoch": 2.97, + "learning_rate": 3.517142857142857e-05, + "loss": 1.5635, "step": 519 }, { - "epoch": 1.98, - "learning_rate": 1.692111959287532e-05, - "loss": 1.5145, + "epoch": 2.97, + "learning_rate": 3.514285714285714e-05, + "loss": 1.4915, "step": 520 }, { - "epoch": 1.99, - "learning_rate": 1.6857506361323156e-05, - "loss": 1.5259, + "epoch": 2.98, + "learning_rate": 3.5114285714285716e-05, + "loss": 1.5432, "step": 521 }, { - "epoch": 1.99, - "learning_rate": 1.6793893129770993e-05, - "loss": 1.7474, + "epoch": 2.98, + "learning_rate": 3.508571428571429e-05, + "loss": 1.5036, "step": 522 }, { - "epoch": 2.0, - "learning_rate": 1.673027989821883e-05, - "loss": 1.5921, + "epoch": 2.99, + "learning_rate": 3.505714285714286e-05, + "loss": 1.4524, "step": 523 }, { - "epoch": 2.0, - "learning_rate": 1.6666666666666667e-05, - "loss": 1.6021, + "epoch": 2.99, + "learning_rate": 3.502857142857143e-05, + "loss": 1.4675, "step": 524 }, { - "epoch": 2.0, - "learning_rate": 1.6603053435114505e-05, - "loss": 1.5225, + "epoch": 3.0, + "learning_rate": 3.5e-05, + "loss": 1.3013, "step": 525 }, { - "epoch": 2.01, - "learning_rate": 1.653944020356234e-05, - "loss": 1.5166, + "epoch": 3.01, + "learning_rate": 3.497142857142857e-05, + "loss": 1.4759, "step": 526 }, { - "epoch": 2.01, - "learning_rate": 1.647582697201018e-05, - "loss": 1.5947, + "epoch": 3.01, + "learning_rate": 3.4942857142857144e-05, + "loss": 1.4916, "step": 527 }, { - "epoch": 2.02, - "learning_rate": 1.6412213740458016e-05, - "loss": 1.4637, + "epoch": 3.02, + "learning_rate": 3.491428571428572e-05, + "loss": 1.3988, "step": 528 }, { - "epoch": 2.02, - "learning_rate": 1.6348600508905853e-05, - "loss": 1.6541, + "epoch": 3.02, + "learning_rate": 3.488571428571429e-05, + "loss": 1.4334, "step": 529 }, { - "epoch": 2.02, - "learning_rate": 1.628498727735369e-05, - "loss": 1.5789, + "epoch": 3.03, + "learning_rate": 3.485714285714286e-05, + "loss": 1.439, "step": 530 }, { - "epoch": 2.03, - "learning_rate": 1.6221374045801527e-05, - "loss": 1.5832, + "epoch": 3.03, + "learning_rate": 3.482857142857143e-05, + "loss": 1.4937, "step": 531 }, { - "epoch": 2.03, - "learning_rate": 1.6157760814249364e-05, - "loss": 1.5627, + "epoch": 3.04, + "learning_rate": 3.48e-05, + "loss": 1.3822, "step": 532 }, { - "epoch": 2.03, - "learning_rate": 1.60941475826972e-05, - "loss": 1.4415, + "epoch": 3.05, + "learning_rate": 3.477142857142857e-05, + "loss": 1.4023, "step": 533 }, { - "epoch": 2.04, - "learning_rate": 1.6030534351145038e-05, - "loss": 1.5264, + "epoch": 3.05, + "learning_rate": 3.4742857142857146e-05, + "loss": 1.3926, "step": 534 }, { - "epoch": 2.04, - "learning_rate": 1.5966921119592875e-05, - "loss": 1.522, + "epoch": 3.06, + "learning_rate": 3.471428571428571e-05, + "loss": 1.3613, "step": 535 }, { - "epoch": 2.05, - "learning_rate": 1.5903307888040712e-05, - "loss": 1.4428, + "epoch": 3.06, + "learning_rate": 3.468571428571429e-05, + "loss": 1.3861, "step": 536 }, { - "epoch": 2.05, - "learning_rate": 1.583969465648855e-05, - "loss": 1.4254, + "epoch": 3.07, + "learning_rate": 3.465714285714286e-05, + "loss": 1.3687, "step": 537 }, { - "epoch": 2.05, - "learning_rate": 1.5776081424936386e-05, - "loss": 1.4867, + "epoch": 3.07, + "learning_rate": 3.4628571428571434e-05, + "loss": 1.4331, "step": 538 }, { - "epoch": 2.06, - "learning_rate": 1.5712468193384224e-05, - "loss": 1.5695, + "epoch": 3.08, + "learning_rate": 3.46e-05, + "loss": 1.3683, "step": 539 }, { - "epoch": 2.06, - "learning_rate": 1.5648854961832064e-05, - "loss": 1.4549, + "epoch": 3.09, + "learning_rate": 3.4571428571428574e-05, + "loss": 1.4502, "step": 540 }, { - "epoch": 2.06, - "learning_rate": 1.5585241730279898e-05, - "loss": 1.5384, + "epoch": 3.09, + "learning_rate": 3.454285714285714e-05, + "loss": 1.3738, "step": 541 }, { - "epoch": 2.07, - "learning_rate": 1.5521628498727735e-05, - "loss": 1.5204, + "epoch": 3.1, + "learning_rate": 3.4514285714285715e-05, + "loss": 1.4079, "step": 542 }, { - "epoch": 2.07, - "learning_rate": 1.5458015267175575e-05, - "loss": 1.4854, + "epoch": 3.1, + "learning_rate": 3.448571428571429e-05, + "loss": 1.399, "step": 543 }, { - "epoch": 2.08, - "learning_rate": 1.539440203562341e-05, - "loss": 1.4278, + "epoch": 3.11, + "learning_rate": 3.445714285714286e-05, + "loss": 1.4222, "step": 544 }, { - "epoch": 2.08, - "learning_rate": 1.5330788804071246e-05, - "loss": 1.4553, + "epoch": 3.11, + "learning_rate": 3.442857142857143e-05, + "loss": 1.3754, "step": 545 }, { - "epoch": 2.08, - "learning_rate": 1.5267175572519086e-05, - "loss": 1.563, + "epoch": 3.12, + "learning_rate": 3.4399999999999996e-05, + "loss": 1.4712, "step": 546 }, { - "epoch": 2.09, - "learning_rate": 1.5203562340966923e-05, - "loss": 1.6154, + "epoch": 3.13, + "learning_rate": 3.437142857142857e-05, + "loss": 1.4293, "step": 547 }, { - "epoch": 2.09, - "learning_rate": 1.5139949109414759e-05, - "loss": 1.4265, + "epoch": 3.13, + "learning_rate": 3.434285714285714e-05, + "loss": 1.4502, "step": 548 }, { - "epoch": 2.1, - "learning_rate": 1.5076335877862596e-05, - "loss": 1.4601, + "epoch": 3.14, + "learning_rate": 3.431428571428572e-05, + "loss": 1.4555, "step": 549 }, { - "epoch": 2.1, - "learning_rate": 1.5012722646310435e-05, - "loss": 1.5103, + "epoch": 3.14, + "learning_rate": 3.428571428571429e-05, + "loss": 1.442, "step": 550 }, { - "epoch": 2.1, - "learning_rate": 1.494910941475827e-05, - "loss": 1.4919, + "epoch": 3.15, + "learning_rate": 3.425714285714286e-05, + "loss": 1.4041, "step": 551 }, { - "epoch": 2.11, - "learning_rate": 1.4885496183206107e-05, - "loss": 1.5718, + "epoch": 3.15, + "learning_rate": 3.422857142857143e-05, + "loss": 1.4795, "step": 552 }, { - "epoch": 2.11, - "learning_rate": 1.4821882951653946e-05, - "loss": 1.4859, + "epoch": 3.16, + "learning_rate": 3.4200000000000005e-05, + "loss": 1.4393, "step": 553 }, { - "epoch": 2.11, - "learning_rate": 1.4758269720101781e-05, - "loss": 1.4904, + "epoch": 3.17, + "learning_rate": 3.417142857142857e-05, + "loss": 1.5046, "step": 554 }, { - "epoch": 2.12, - "learning_rate": 1.4694656488549618e-05, - "loss": 1.5625, + "epoch": 3.17, + "learning_rate": 3.4142857142857145e-05, + "loss": 1.3706, "step": 555 }, { - "epoch": 2.12, - "learning_rate": 1.4631043256997457e-05, - "loss": 1.5098, + "epoch": 3.18, + "learning_rate": 3.411428571428571e-05, + "loss": 1.4684, "step": 556 }, { - "epoch": 2.13, - "learning_rate": 1.4567430025445294e-05, - "loss": 1.6042, + "epoch": 3.18, + "learning_rate": 3.4085714285714286e-05, + "loss": 1.3998, "step": 557 }, { - "epoch": 2.13, - "learning_rate": 1.450381679389313e-05, - "loss": 1.553, + "epoch": 3.19, + "learning_rate": 3.405714285714286e-05, + "loss": 1.4217, "step": 558 }, { - "epoch": 2.13, - "learning_rate": 1.4440203562340968e-05, - "loss": 1.6064, + "epoch": 3.19, + "learning_rate": 3.402857142857143e-05, + "loss": 1.4942, "step": 559 }, { - "epoch": 2.14, - "learning_rate": 1.4376590330788805e-05, - "loss": 1.7289, + "epoch": 3.2, + "learning_rate": 3.4000000000000007e-05, + "loss": 1.3545, "step": 560 }, { - "epoch": 2.14, - "learning_rate": 1.431297709923664e-05, - "loss": 1.5271, + "epoch": 3.21, + "learning_rate": 3.397142857142857e-05, + "loss": 1.4337, "step": 561 }, { - "epoch": 2.15, - "learning_rate": 1.424936386768448e-05, - "loss": 1.5183, + "epoch": 3.21, + "learning_rate": 3.394285714285714e-05, + "loss": 1.375, "step": 562 }, { - "epoch": 2.15, - "learning_rate": 1.4185750636132317e-05, - "loss": 1.6272, + "epoch": 3.22, + "learning_rate": 3.3914285714285714e-05, + "loss": 1.3336, "step": 563 }, { - "epoch": 2.15, - "learning_rate": 1.4122137404580155e-05, - "loss": 1.4932, + "epoch": 3.22, + "learning_rate": 3.388571428571429e-05, + "loss": 1.5026, "step": 564 }, { - "epoch": 2.16, - "learning_rate": 1.4058524173027989e-05, - "loss": 1.474, + "epoch": 3.23, + "learning_rate": 3.385714285714286e-05, + "loss": 1.4207, "step": 565 }, { - "epoch": 2.16, - "learning_rate": 1.3994910941475828e-05, - "loss": 1.5057, + "epoch": 3.23, + "learning_rate": 3.3828571428571435e-05, + "loss": 1.3992, "step": 566 }, { - "epoch": 2.16, - "learning_rate": 1.3931297709923667e-05, - "loss": 1.465, + "epoch": 3.24, + "learning_rate": 3.38e-05, + "loss": 1.4199, "step": 567 }, { - "epoch": 2.17, - "learning_rate": 1.38676844783715e-05, - "loss": 1.5127, + "epoch": 3.25, + "learning_rate": 3.377142857142857e-05, + "loss": 1.4919, "step": 568 }, { - "epoch": 2.17, - "learning_rate": 1.3804071246819339e-05, - "loss": 1.521, + "epoch": 3.25, + "learning_rate": 3.374285714285714e-05, + "loss": 1.4955, "step": 569 }, { - "epoch": 2.18, - "learning_rate": 1.3740458015267178e-05, - "loss": 1.5181, + "epoch": 3.26, + "learning_rate": 3.3714285714285716e-05, + "loss": 1.3187, "step": 570 }, { - "epoch": 2.18, - "learning_rate": 1.3676844783715011e-05, - "loss": 1.5222, + "epoch": 3.26, + "learning_rate": 3.368571428571429e-05, + "loss": 1.4405, "step": 571 }, { - "epoch": 2.18, - "learning_rate": 1.361323155216285e-05, - "loss": 1.5319, + "epoch": 3.27, + "learning_rate": 3.3657142857142856e-05, + "loss": 1.4199, "step": 572 }, { - "epoch": 2.19, - "learning_rate": 1.3549618320610687e-05, - "loss": 1.3958, + "epoch": 3.27, + "learning_rate": 3.362857142857143e-05, + "loss": 1.4144, "step": 573 }, { - "epoch": 2.19, - "learning_rate": 1.3486005089058526e-05, - "loss": 1.4805, + "epoch": 3.28, + "learning_rate": 3.3600000000000004e-05, + "loss": 1.3605, "step": 574 }, { - "epoch": 2.19, - "learning_rate": 1.3422391857506361e-05, - "loss": 1.4634, + "epoch": 3.29, + "learning_rate": 3.357142857142857e-05, + "loss": 1.4023, "step": 575 }, { - "epoch": 2.2, - "learning_rate": 1.3358778625954198e-05, - "loss": 1.6046, + "epoch": 3.29, + "learning_rate": 3.3542857142857144e-05, + "loss": 1.4218, "step": 576 }, { - "epoch": 2.2, - "learning_rate": 1.3295165394402037e-05, - "loss": 1.5216, + "epoch": 3.3, + "learning_rate": 3.351428571428572e-05, + "loss": 1.3927, "step": 577 }, { - "epoch": 2.21, - "learning_rate": 1.3231552162849873e-05, - "loss": 1.5602, + "epoch": 3.3, + "learning_rate": 3.3485714285714285e-05, + "loss": 1.4504, "step": 578 }, { - "epoch": 2.21, - "learning_rate": 1.316793893129771e-05, - "loss": 1.6176, + "epoch": 3.31, + "learning_rate": 3.345714285714286e-05, + "loss": 1.4313, "step": 579 }, { - "epoch": 2.21, - "learning_rate": 1.3104325699745548e-05, - "loss": 1.5447, + "epoch": 3.31, + "learning_rate": 3.342857142857143e-05, + "loss": 1.3784, "step": 580 }, { - "epoch": 2.22, - "learning_rate": 1.3040712468193386e-05, - "loss": 1.4158, + "epoch": 3.32, + "learning_rate": 3.3400000000000005e-05, + "loss": 1.3691, "step": 581 }, { - "epoch": 2.22, - "learning_rate": 1.2977099236641221e-05, - "loss": 1.4954, + "epoch": 3.33, + "learning_rate": 3.337142857142857e-05, + "loss": 1.389, "step": 582 }, { - "epoch": 2.23, - "learning_rate": 1.291348600508906e-05, - "loss": 1.473, + "epoch": 3.33, + "learning_rate": 3.334285714285714e-05, + "loss": 1.4565, "step": 583 }, { - "epoch": 2.23, - "learning_rate": 1.2849872773536897e-05, - "loss": 1.6029, + "epoch": 3.34, + "learning_rate": 3.331428571428571e-05, + "loss": 1.3677, "step": 584 }, { - "epoch": 2.23, - "learning_rate": 1.2786259541984732e-05, - "loss": 1.4873, + "epoch": 3.34, + "learning_rate": 3.3285714285714286e-05, + "loss": 1.3421, "step": 585 }, { - "epoch": 2.24, - "learning_rate": 1.2722646310432571e-05, - "loss": 1.5771, + "epoch": 3.35, + "learning_rate": 3.325714285714286e-05, + "loss": 1.2984, "step": 586 }, { - "epoch": 2.24, - "learning_rate": 1.2659033078880408e-05, - "loss": 1.5967, + "epoch": 3.35, + "learning_rate": 3.3228571428571434e-05, + "loss": 1.3738, "step": 587 }, { - "epoch": 2.24, - "learning_rate": 1.2595419847328243e-05, - "loss": 1.4916, + "epoch": 3.36, + "learning_rate": 3.32e-05, + "loss": 1.4382, "step": 588 }, { - "epoch": 2.25, - "learning_rate": 1.2531806615776082e-05, - "loss": 1.4802, + "epoch": 3.37, + "learning_rate": 3.3171428571428574e-05, + "loss": 1.4113, "step": 589 }, { - "epoch": 2.25, - "learning_rate": 1.2468193384223919e-05, - "loss": 1.4806, + "epoch": 3.37, + "learning_rate": 3.314285714285714e-05, + "loss": 1.3727, "step": 590 }, { - "epoch": 2.26, - "learning_rate": 1.2404580152671756e-05, - "loss": 1.517, + "epoch": 3.38, + "learning_rate": 3.3114285714285715e-05, + "loss": 1.4042, "step": 591 }, { - "epoch": 2.26, - "learning_rate": 1.2340966921119595e-05, - "loss": 1.5302, + "epoch": 3.38, + "learning_rate": 3.308571428571429e-05, + "loss": 1.4052, "step": 592 }, { - "epoch": 2.26, - "learning_rate": 1.227735368956743e-05, - "loss": 1.4838, + "epoch": 3.39, + "learning_rate": 3.305714285714286e-05, + "loss": 1.3701, "step": 593 }, { - "epoch": 2.27, - "learning_rate": 1.2213740458015267e-05, - "loss": 1.54, + "epoch": 3.39, + "learning_rate": 3.302857142857143e-05, + "loss": 1.4521, "step": 594 }, { - "epoch": 2.27, - "learning_rate": 1.2150127226463104e-05, - "loss": 1.4704, + "epoch": 3.4, + "learning_rate": 3.3e-05, + "loss": 1.3066, "step": 595 }, { - "epoch": 2.27, - "learning_rate": 1.2086513994910942e-05, - "loss": 1.4974, + "epoch": 3.41, + "learning_rate": 3.2971428571428576e-05, + "loss": 1.4493, "step": 596 }, { - "epoch": 2.28, - "learning_rate": 1.202290076335878e-05, - "loss": 1.4962, + "epoch": 3.41, + "learning_rate": 3.294285714285714e-05, + "loss": 1.3907, "step": 597 }, { - "epoch": 2.28, - "learning_rate": 1.1959287531806616e-05, - "loss": 1.6077, + "epoch": 3.42, + "learning_rate": 3.291428571428572e-05, + "loss": 1.3197, "step": 598 }, { - "epoch": 2.29, - "learning_rate": 1.1895674300254453e-05, - "loss": 1.4532, + "epoch": 3.42, + "learning_rate": 3.2885714285714284e-05, + "loss": 1.4457, "step": 599 }, { - "epoch": 2.29, - "learning_rate": 1.1832061068702292e-05, - "loss": 1.4745, + "epoch": 3.43, + "learning_rate": 3.285714285714286e-05, + "loss": 1.4287, "step": 600 }, { - "epoch": 2.29, - "learning_rate": 1.1768447837150127e-05, - "loss": 1.6156, + "epoch": 3.43, + "learning_rate": 3.282857142857143e-05, + "loss": 1.3399, "step": 601 }, { - "epoch": 2.3, - "learning_rate": 1.1704834605597966e-05, - "loss": 1.5023, + "epoch": 3.44, + "learning_rate": 3.2800000000000004e-05, + "loss": 1.4595, "step": 602 }, { - "epoch": 2.3, - "learning_rate": 1.1641221374045803e-05, - "loss": 1.5043, + "epoch": 3.45, + "learning_rate": 3.277142857142858e-05, + "loss": 1.3931, "step": 603 }, { - "epoch": 2.31, - "learning_rate": 1.1577608142493638e-05, - "loss": 1.4825, + "epoch": 3.45, + "learning_rate": 3.2742857142857145e-05, + "loss": 1.413, "step": 604 }, { - "epoch": 2.31, - "learning_rate": 1.1513994910941477e-05, - "loss": 1.5165, + "epoch": 3.46, + "learning_rate": 3.271428571428571e-05, + "loss": 1.4462, "step": 605 }, { - "epoch": 2.31, - "learning_rate": 1.1450381679389314e-05, - "loss": 1.5332, + "epoch": 3.46, + "learning_rate": 3.2685714285714285e-05, + "loss": 1.3365, "step": 606 }, { - "epoch": 2.32, - "learning_rate": 1.1386768447837151e-05, - "loss": 1.4058, + "epoch": 3.47, + "learning_rate": 3.265714285714286e-05, + "loss": 1.3412, "step": 607 }, { - "epoch": 2.32, - "learning_rate": 1.1323155216284988e-05, - "loss": 1.5201, + "epoch": 3.47, + "learning_rate": 3.262857142857143e-05, + "loss": 1.4854, "step": 608 }, { - "epoch": 2.32, - "learning_rate": 1.1259541984732823e-05, - "loss": 1.4773, + "epoch": 3.48, + "learning_rate": 3.26e-05, + "loss": 1.3158, "step": 609 }, { - "epoch": 2.33, - "learning_rate": 1.1195928753180662e-05, - "loss": 1.609, + "epoch": 3.49, + "learning_rate": 3.257142857142857e-05, + "loss": 1.4656, "step": 610 }, { - "epoch": 2.33, - "learning_rate": 1.11323155216285e-05, - "loss": 1.4419, + "epoch": 3.49, + "learning_rate": 3.254285714285715e-05, + "loss": 1.4103, "step": 611 }, { - "epoch": 2.34, - "learning_rate": 1.1068702290076336e-05, - "loss": 1.5836, + "epoch": 3.5, + "learning_rate": 3.2514285714285714e-05, + "loss": 1.4259, "step": 612 }, { - "epoch": 2.34, - "learning_rate": 1.1005089058524173e-05, - "loss": 1.6614, + "epoch": 3.5, + "learning_rate": 3.248571428571429e-05, + "loss": 1.4604, "step": 613 }, { - "epoch": 2.34, - "learning_rate": 1.0941475826972012e-05, - "loss": 1.6177, + "epoch": 3.51, + "learning_rate": 3.245714285714286e-05, + "loss": 1.3915, "step": 614 }, { - "epoch": 2.35, - "learning_rate": 1.0877862595419848e-05, - "loss": 1.4421, + "epoch": 3.51, + "learning_rate": 3.242857142857143e-05, + "loss": 1.3336, "step": 615 }, { - "epoch": 2.35, - "learning_rate": 1.0814249363867685e-05, - "loss": 1.542, + "epoch": 3.52, + "learning_rate": 3.24e-05, + "loss": 1.4415, "step": 616 }, { - "epoch": 2.35, - "learning_rate": 1.0750636132315522e-05, - "loss": 1.492, + "epoch": 3.53, + "learning_rate": 3.2371428571428575e-05, + "loss": 1.426, "step": 617 }, { - "epoch": 2.36, - "learning_rate": 1.0687022900763359e-05, - "loss": 1.5336, + "epoch": 3.53, + "learning_rate": 3.234285714285715e-05, + "loss": 1.4257, "step": 618 }, { - "epoch": 2.36, - "learning_rate": 1.0623409669211198e-05, - "loss": 1.5533, + "epoch": 3.54, + "learning_rate": 3.2314285714285716e-05, + "loss": 1.392, "step": 619 }, { - "epoch": 2.37, - "learning_rate": 1.0559796437659033e-05, - "loss": 1.6151, + "epoch": 3.54, + "learning_rate": 3.228571428571428e-05, + "loss": 1.3406, "step": 620 }, { - "epoch": 2.37, - "learning_rate": 1.049618320610687e-05, - "loss": 1.5709, + "epoch": 3.55, + "learning_rate": 3.2257142857142856e-05, + "loss": 1.4017, "step": 621 }, { - "epoch": 2.37, - "learning_rate": 1.0432569974554709e-05, - "loss": 1.5086, + "epoch": 3.55, + "learning_rate": 3.222857142857143e-05, + "loss": 1.4736, "step": 622 }, { - "epoch": 2.38, - "learning_rate": 1.0368956743002544e-05, - "loss": 1.501, + "epoch": 3.56, + "learning_rate": 3.2200000000000003e-05, + "loss": 1.3957, "step": 623 }, { - "epoch": 2.38, - "learning_rate": 1.0305343511450383e-05, - "loss": 1.5721, + "epoch": 3.57, + "learning_rate": 3.217142857142858e-05, + "loss": 1.4527, "step": 624 }, { - "epoch": 2.39, - "learning_rate": 1.024173027989822e-05, - "loss": 1.6042, + "epoch": 3.57, + "learning_rate": 3.2142857142857144e-05, + "loss": 1.4024, "step": 625 }, { - "epoch": 2.39, - "learning_rate": 1.0178117048346055e-05, - "loss": 1.4864, + "epoch": 3.58, + "learning_rate": 3.211428571428571e-05, + "loss": 1.3226, "step": 626 }, { - "epoch": 2.39, - "learning_rate": 1.0114503816793894e-05, - "loss": 1.5107, + "epoch": 3.58, + "learning_rate": 3.2085714285714284e-05, + "loss": 1.4371, "step": 627 }, { - "epoch": 2.4, - "learning_rate": 1.0050890585241731e-05, - "loss": 1.5386, + "epoch": 3.59, + "learning_rate": 3.205714285714286e-05, + "loss": 1.3084, "step": 628 }, { - "epoch": 2.4, - "learning_rate": 9.987277353689568e-06, - "loss": 1.5453, + "epoch": 3.59, + "learning_rate": 3.202857142857143e-05, + "loss": 1.4082, "step": 629 }, { - "epoch": 2.4, - "learning_rate": 9.923664122137405e-06, - "loss": 1.51, + "epoch": 3.6, + "learning_rate": 3.2000000000000005e-05, + "loss": 1.3619, "step": 630 }, { - "epoch": 2.41, - "learning_rate": 9.860050890585242e-06, - "loss": 1.5821, + "epoch": 3.61, + "learning_rate": 3.197142857142857e-05, + "loss": 1.3552, "step": 631 }, { - "epoch": 2.41, - "learning_rate": 9.79643765903308e-06, - "loss": 1.5815, + "epoch": 3.61, + "learning_rate": 3.1942857142857146e-05, + "loss": 1.3593, "step": 632 }, { - "epoch": 2.42, - "learning_rate": 9.732824427480917e-06, - "loss": 1.5071, + "epoch": 3.62, + "learning_rate": 3.191428571428571e-05, + "loss": 1.3934, "step": 633 }, { - "epoch": 2.42, - "learning_rate": 9.669211195928754e-06, - "loss": 1.5823, + "epoch": 3.62, + "learning_rate": 3.1885714285714286e-05, + "loss": 1.4082, "step": 634 }, { - "epoch": 2.42, - "learning_rate": 9.60559796437659e-06, - "loss": 1.4497, + "epoch": 3.63, + "learning_rate": 3.185714285714286e-05, + "loss": 1.2599, "step": 635 }, { - "epoch": 2.43, - "learning_rate": 9.541984732824428e-06, - "loss": 1.5951, + "epoch": 3.63, + "learning_rate": 3.182857142857143e-05, + "loss": 1.5062, "step": 636 }, { - "epoch": 2.43, - "learning_rate": 9.478371501272265e-06, - "loss": 1.5335, + "epoch": 3.64, + "learning_rate": 3.18e-05, + "loss": 1.4426, "step": 637 }, { - "epoch": 2.44, - "learning_rate": 9.414758269720102e-06, - "loss": 1.5423, + "epoch": 3.65, + "learning_rate": 3.1771428571428574e-05, + "loss": 1.41, "step": 638 }, { - "epoch": 2.44, - "learning_rate": 9.351145038167939e-06, - "loss": 1.5326, + "epoch": 3.65, + "learning_rate": 3.174285714285715e-05, + "loss": 1.4143, "step": 639 }, { - "epoch": 2.44, - "learning_rate": 9.287531806615776e-06, - "loss": 1.3375, + "epoch": 3.66, + "learning_rate": 3.1714285714285715e-05, + "loss": 1.3948, "step": 640 }, { - "epoch": 2.45, - "learning_rate": 9.223918575063615e-06, - "loss": 1.323, + "epoch": 3.66, + "learning_rate": 3.168571428571429e-05, + "loss": 1.3897, "step": 641 }, { - "epoch": 2.45, - "learning_rate": 9.16030534351145e-06, - "loss": 1.4634, + "epoch": 3.67, + "learning_rate": 3.1657142857142855e-05, + "loss": 1.459, "step": 642 }, { - "epoch": 2.45, - "learning_rate": 9.096692111959287e-06, - "loss": 1.426, + "epoch": 3.67, + "learning_rate": 3.162857142857143e-05, + "loss": 1.4066, "step": 643 }, { - "epoch": 2.46, - "learning_rate": 9.033078880407126e-06, - "loss": 1.65, + "epoch": 3.68, + "learning_rate": 3.16e-05, + "loss": 1.4348, "step": 644 }, { - "epoch": 2.46, - "learning_rate": 8.969465648854961e-06, - "loss": 1.485, + "epoch": 3.69, + "learning_rate": 3.1571428571428576e-05, + "loss": 1.3857, "step": 645 }, { - "epoch": 2.47, - "learning_rate": 8.9058524173028e-06, - "loss": 1.6655, + "epoch": 3.69, + "learning_rate": 3.154285714285714e-05, + "loss": 1.3381, "step": 646 }, { - "epoch": 2.47, - "learning_rate": 8.842239185750637e-06, - "loss": 1.372, + "epoch": 3.7, + "learning_rate": 3.1514285714285717e-05, + "loss": 1.4283, "step": 647 }, { - "epoch": 2.47, - "learning_rate": 8.778625954198473e-06, - "loss": 1.5437, + "epoch": 3.7, + "learning_rate": 3.148571428571428e-05, + "loss": 1.4544, "step": 648 }, { - "epoch": 2.48, - "learning_rate": 8.715012722646311e-06, - "loss": 1.5228, + "epoch": 3.71, + "learning_rate": 3.145714285714286e-05, + "loss": 1.434, "step": 649 }, { - "epoch": 2.48, - "learning_rate": 8.651399491094148e-06, - "loss": 1.3835, + "epoch": 3.71, + "learning_rate": 3.142857142857143e-05, + "loss": 1.4212, "step": 650 }, { - "epoch": 2.48, - "learning_rate": 8.587786259541985e-06, - "loss": 1.4312, + "epoch": 3.72, + "learning_rate": 3.1400000000000004e-05, + "loss": 1.3627, "step": 651 }, { - "epoch": 2.49, - "learning_rate": 8.524173027989823e-06, - "loss": 1.5355, + "epoch": 3.73, + "learning_rate": 3.137142857142857e-05, + "loss": 1.4421, "step": 652 }, { - "epoch": 2.49, - "learning_rate": 8.46055979643766e-06, - "loss": 1.3893, + "epoch": 3.73, + "learning_rate": 3.1342857142857145e-05, + "loss": 1.4152, "step": 653 }, { - "epoch": 2.5, - "learning_rate": 8.396946564885497e-06, - "loss": 1.3776, + "epoch": 3.74, + "learning_rate": 3.131428571428572e-05, + "loss": 1.2664, "step": 654 }, { - "epoch": 2.5, - "learning_rate": 8.333333333333334e-06, - "loss": 1.4967, + "epoch": 3.74, + "learning_rate": 3.1285714285714285e-05, + "loss": 1.44, "step": 655 }, { - "epoch": 2.5, - "learning_rate": 8.26972010178117e-06, - "loss": 1.3751, + "epoch": 3.75, + "learning_rate": 3.125714285714286e-05, + "loss": 1.3987, "step": 656 }, { - "epoch": 2.51, - "learning_rate": 8.206106870229008e-06, - "loss": 1.5909, + "epoch": 3.75, + "learning_rate": 3.122857142857143e-05, + "loss": 1.4801, "step": 657 }, { - "epoch": 2.51, - "learning_rate": 8.142493638676845e-06, - "loss": 1.6396, + "epoch": 3.76, + "learning_rate": 3.12e-05, + "loss": 1.292, "step": 658 }, { - "epoch": 2.52, - "learning_rate": 8.078880407124682e-06, - "loss": 1.5689, + "epoch": 3.77, + "learning_rate": 3.117142857142857e-05, + "loss": 1.3902, "step": 659 }, { - "epoch": 2.52, - "learning_rate": 8.015267175572519e-06, - "loss": 1.4298, + "epoch": 3.77, + "learning_rate": 3.114285714285715e-05, + "loss": 1.4487, "step": 660 }, { - "epoch": 2.52, - "learning_rate": 7.951653944020356e-06, - "loss": 1.4689, + "epoch": 3.78, + "learning_rate": 3.111428571428572e-05, + "loss": 1.3712, "step": 661 }, { - "epoch": 2.53, - "learning_rate": 7.888040712468193e-06, - "loss": 1.6027, + "epoch": 3.78, + "learning_rate": 3.108571428571429e-05, + "loss": 1.3813, "step": 662 }, { - "epoch": 2.53, - "learning_rate": 7.824427480916032e-06, - "loss": 1.4128, + "epoch": 3.79, + "learning_rate": 3.1057142857142854e-05, + "loss": 1.3506, "step": 663 }, { - "epoch": 2.53, - "learning_rate": 7.760814249363867e-06, - "loss": 1.5457, + "epoch": 3.79, + "learning_rate": 3.102857142857143e-05, + "loss": 1.3984, "step": 664 }, { - "epoch": 2.54, - "learning_rate": 7.697201017811704e-06, - "loss": 1.5069, + "epoch": 3.8, + "learning_rate": 3.1e-05, + "loss": 1.465, "step": 665 }, { - "epoch": 2.54, - "learning_rate": 7.633587786259543e-06, - "loss": 1.5511, + "epoch": 3.81, + "learning_rate": 3.0971428571428575e-05, + "loss": 1.3658, "step": 666 }, { - "epoch": 2.55, - "learning_rate": 7.569974554707379e-06, - "loss": 1.4441, + "epoch": 3.81, + "learning_rate": 3.094285714285715e-05, + "loss": 1.372, "step": 667 }, { - "epoch": 2.55, - "learning_rate": 7.506361323155217e-06, - "loss": 1.4732, + "epoch": 3.82, + "learning_rate": 3.0914285714285715e-05, + "loss": 1.3928, "step": 668 }, { - "epoch": 2.55, - "learning_rate": 7.4427480916030536e-06, - "loss": 1.479, + "epoch": 3.82, + "learning_rate": 3.088571428571428e-05, + "loss": 1.406, "step": 669 }, { - "epoch": 2.56, - "learning_rate": 7.379134860050891e-06, - "loss": 1.4266, + "epoch": 3.83, + "learning_rate": 3.0857142857142856e-05, + "loss": 1.4114, "step": 670 }, { - "epoch": 2.56, - "learning_rate": 7.3155216284987285e-06, - "loss": 1.5745, + "epoch": 3.83, + "learning_rate": 3.082857142857143e-05, + "loss": 1.4176, "step": 671 }, { - "epoch": 2.56, - "learning_rate": 7.251908396946565e-06, - "loss": 1.3644, + "epoch": 3.84, + "learning_rate": 3.08e-05, + "loss": 1.454, "step": 672 }, { - "epoch": 2.57, - "learning_rate": 7.188295165394403e-06, - "loss": 1.4786, + "epoch": 3.85, + "learning_rate": 3.077142857142857e-05, + "loss": 1.3545, "step": 673 }, { - "epoch": 2.57, - "learning_rate": 7.12468193384224e-06, - "loss": 1.5273, + "epoch": 3.85, + "learning_rate": 3.0742857142857144e-05, + "loss": 1.3091, "step": 674 }, { - "epoch": 2.58, - "learning_rate": 7.061068702290078e-06, - "loss": 1.4906, + "epoch": 3.86, + "learning_rate": 3.071428571428572e-05, + "loss": 1.3078, "step": 675 }, { - "epoch": 2.58, - "learning_rate": 6.997455470737914e-06, - "loss": 1.4297, + "epoch": 3.86, + "learning_rate": 3.068571428571429e-05, + "loss": 1.3847, "step": 676 }, { - "epoch": 2.58, - "learning_rate": 6.93384223918575e-06, - "loss": 1.5863, + "epoch": 3.87, + "learning_rate": 3.065714285714286e-05, + "loss": 1.4205, "step": 677 }, { - "epoch": 2.59, - "learning_rate": 6.870229007633589e-06, - "loss": 1.5445, + "epoch": 3.87, + "learning_rate": 3.062857142857143e-05, + "loss": 1.4477, "step": 678 }, { - "epoch": 2.59, - "learning_rate": 6.806615776081425e-06, - "loss": 1.4213, + "epoch": 3.88, + "learning_rate": 3.06e-05, + "loss": 1.4563, "step": 679 }, { - "epoch": 2.6, - "learning_rate": 6.743002544529263e-06, - "loss": 1.4672, + "epoch": 3.89, + "learning_rate": 3.057142857142857e-05, + "loss": 1.3703, "step": 680 }, { - "epoch": 2.6, - "learning_rate": 6.679389312977099e-06, - "loss": 1.5799, + "epoch": 3.89, + "learning_rate": 3.0542857142857146e-05, + "loss": 1.3799, "step": 681 }, { - "epoch": 2.6, - "learning_rate": 6.615776081424936e-06, - "loss": 1.5335, + "epoch": 3.9, + "learning_rate": 3.0514285714285716e-05, + "loss": 1.3707, "step": 682 }, { - "epoch": 2.61, - "learning_rate": 6.552162849872774e-06, - "loss": 1.4691, + "epoch": 3.9, + "learning_rate": 3.048571428571429e-05, + "loss": 1.4205, "step": 683 }, { - "epoch": 2.61, - "learning_rate": 6.4885496183206104e-06, - "loss": 1.504, + "epoch": 3.91, + "learning_rate": 3.0457142857142856e-05, + "loss": 1.4544, "step": 684 }, { - "epoch": 2.61, - "learning_rate": 6.424936386768448e-06, - "loss": 1.4863, + "epoch": 3.91, + "learning_rate": 3.042857142857143e-05, + "loss": 1.3434, "step": 685 }, { - "epoch": 2.62, - "learning_rate": 6.3613231552162854e-06, - "loss": 1.4737, + "epoch": 3.92, + "learning_rate": 3.04e-05, + "loss": 1.3884, "step": 686 }, { - "epoch": 2.62, - "learning_rate": 6.297709923664122e-06, - "loss": 1.6219, + "epoch": 3.93, + "learning_rate": 3.0371428571428574e-05, + "loss": 1.3845, "step": 687 }, { - "epoch": 2.63, - "learning_rate": 6.2340966921119596e-06, - "loss": 1.5618, + "epoch": 3.93, + "learning_rate": 3.0342857142857144e-05, + "loss": 1.4193, "step": 688 }, { - "epoch": 2.63, - "learning_rate": 6.1704834605597975e-06, - "loss": 1.523, + "epoch": 3.94, + "learning_rate": 3.0314285714285718e-05, + "loss": 1.3968, "step": 689 }, { - "epoch": 2.63, - "learning_rate": 6.106870229007634e-06, - "loss": 1.4841, + "epoch": 3.94, + "learning_rate": 3.0285714285714288e-05, + "loss": 1.3406, "step": 690 }, { - "epoch": 2.64, - "learning_rate": 6.043256997455471e-06, - "loss": 1.5883, + "epoch": 3.95, + "learning_rate": 3.0257142857142855e-05, + "loss": 1.4414, "step": 691 }, { - "epoch": 2.64, - "learning_rate": 5.979643765903308e-06, - "loss": 1.5221, + "epoch": 3.95, + "learning_rate": 3.022857142857143e-05, + "loss": 1.3637, "step": 692 }, { - "epoch": 2.65, - "learning_rate": 5.916030534351146e-06, - "loss": 1.4583, + "epoch": 3.96, + "learning_rate": 3.02e-05, + "loss": 1.4607, "step": 693 }, { - "epoch": 2.65, - "learning_rate": 5.852417302798983e-06, - "loss": 1.6034, + "epoch": 3.97, + "learning_rate": 3.0171428571428572e-05, + "loss": 1.4028, "step": 694 }, { - "epoch": 2.65, - "learning_rate": 5.788804071246819e-06, - "loss": 1.4732, + "epoch": 3.97, + "learning_rate": 3.0142857142857146e-05, + "loss": 1.3849, "step": 695 }, { - "epoch": 2.66, - "learning_rate": 5.725190839694657e-06, - "loss": 1.5974, + "epoch": 3.98, + "learning_rate": 3.0114285714285716e-05, + "loss": 1.3121, "step": 696 }, { - "epoch": 2.66, - "learning_rate": 5.661577608142494e-06, - "loss": 1.5268, + "epoch": 3.98, + "learning_rate": 3.008571428571429e-05, + "loss": 1.3836, "step": 697 }, { - "epoch": 2.66, - "learning_rate": 5.597964376590331e-06, - "loss": 1.4549, + "epoch": 3.99, + "learning_rate": 3.0057142857142857e-05, + "loss": 1.3917, "step": 698 }, { - "epoch": 2.67, - "learning_rate": 5.534351145038168e-06, - "loss": 1.5012, + "epoch": 3.99, + "learning_rate": 3.0028571428571427e-05, + "loss": 1.3586, "step": 699 }, { - "epoch": 2.67, - "learning_rate": 5.470737913486006e-06, - "loss": 1.4293, + "epoch": 4.0, + "learning_rate": 3e-05, + "loss": 1.3215, "step": 700 }, { - "epoch": 2.68, - "learning_rate": 5.407124681933842e-06, - "loss": 1.6263, + "epoch": 4.01, + "learning_rate": 2.997142857142857e-05, + "loss": 1.3277, "step": 701 }, { - "epoch": 2.68, - "learning_rate": 5.343511450381679e-06, - "loss": 1.542, + "epoch": 4.01, + "learning_rate": 2.9942857142857145e-05, + "loss": 1.3669, "step": 702 }, { - "epoch": 2.68, - "learning_rate": 5.2798982188295165e-06, - "loss": 1.5289, + "epoch": 4.02, + "learning_rate": 2.9914285714285718e-05, + "loss": 1.3304, "step": 703 }, { - "epoch": 2.69, - "learning_rate": 5.216284987277354e-06, - "loss": 1.4846, + "epoch": 4.02, + "learning_rate": 2.988571428571429e-05, + "loss": 1.3059, "step": 704 }, { - "epoch": 2.69, - "learning_rate": 5.1526717557251914e-06, - "loss": 1.5461, + "epoch": 4.03, + "learning_rate": 2.9857142857142862e-05, + "loss": 1.3963, "step": 705 }, { - "epoch": 2.69, - "learning_rate": 5.089058524173028e-06, - "loss": 1.516, + "epoch": 4.03, + "learning_rate": 2.982857142857143e-05, + "loss": 1.333, "step": 706 }, { - "epoch": 2.7, - "learning_rate": 5.025445292620866e-06, - "loss": 1.5432, + "epoch": 4.04, + "learning_rate": 2.98e-05, + "loss": 1.2779, "step": 707 }, { - "epoch": 2.7, - "learning_rate": 4.961832061068703e-06, - "loss": 1.4167, + "epoch": 4.05, + "learning_rate": 2.9771428571428573e-05, + "loss": 1.3823, "step": 708 }, { - "epoch": 2.71, - "learning_rate": 4.89821882951654e-06, - "loss": 1.441, + "epoch": 4.05, + "learning_rate": 2.9742857142857143e-05, + "loss": 1.3253, "step": 709 }, { - "epoch": 2.71, - "learning_rate": 4.834605597964377e-06, - "loss": 1.547, + "epoch": 4.06, + "learning_rate": 2.9714285714285717e-05, + "loss": 1.372, "step": 710 }, { - "epoch": 2.71, - "learning_rate": 4.770992366412214e-06, - "loss": 1.5322, + "epoch": 4.06, + "learning_rate": 2.968571428571429e-05, + "loss": 1.3313, "step": 711 }, { - "epoch": 2.72, - "learning_rate": 4.707379134860051e-06, - "loss": 1.4142, + "epoch": 4.07, + "learning_rate": 2.965714285714286e-05, + "loss": 1.3027, "step": 712 }, { - "epoch": 2.72, - "learning_rate": 4.643765903307888e-06, - "loss": 1.4593, + "epoch": 4.07, + "learning_rate": 2.9628571428571428e-05, + "loss": 1.3682, "step": 713 }, { - "epoch": 2.73, - "learning_rate": 4.580152671755725e-06, - "loss": 1.5256, + "epoch": 4.08, + "learning_rate": 2.96e-05, + "loss": 1.3231, "step": 714 }, { - "epoch": 2.73, - "learning_rate": 4.516539440203563e-06, - "loss": 1.5131, + "epoch": 4.09, + "learning_rate": 2.957142857142857e-05, + "loss": 1.3922, "step": 715 }, { - "epoch": 2.73, - "learning_rate": 4.4529262086514e-06, - "loss": 1.474, + "epoch": 4.09, + "learning_rate": 2.9542857142857145e-05, + "loss": 1.3762, "step": 716 }, { - "epoch": 2.74, - "learning_rate": 4.389312977099236e-06, - "loss": 1.4454, + "epoch": 4.1, + "learning_rate": 2.9514285714285715e-05, + "loss": 1.4071, "step": 717 }, { - "epoch": 2.74, - "learning_rate": 4.325699745547074e-06, - "loss": 1.6011, + "epoch": 4.1, + "learning_rate": 2.948571428571429e-05, + "loss": 1.3917, "step": 718 }, { - "epoch": 2.74, - "learning_rate": 4.262086513994911e-06, - "loss": 1.608, + "epoch": 4.11, + "learning_rate": 2.9457142857142863e-05, + "loss": 1.33, "step": 719 }, { - "epoch": 2.75, - "learning_rate": 4.198473282442748e-06, - "loss": 1.4159, + "epoch": 4.11, + "learning_rate": 2.9428571428571426e-05, + "loss": 1.3685, "step": 720 }, { - "epoch": 2.75, - "learning_rate": 4.134860050890585e-06, - "loss": 1.4697, + "epoch": 4.12, + "learning_rate": 2.94e-05, + "loss": 1.299, "step": 721 }, { - "epoch": 2.76, - "learning_rate": 4.0712468193384225e-06, - "loss": 1.4841, + "epoch": 4.13, + "learning_rate": 2.9371428571428573e-05, + "loss": 1.4026, "step": 722 }, { - "epoch": 2.76, - "learning_rate": 4.0076335877862595e-06, - "loss": 1.508, + "epoch": 4.13, + "learning_rate": 2.9342857142857144e-05, + "loss": 1.3235, "step": 723 }, { - "epoch": 2.76, - "learning_rate": 3.944020356234097e-06, - "loss": 1.4807, + "epoch": 4.14, + "learning_rate": 2.9314285714285717e-05, + "loss": 1.3566, "step": 724 }, { - "epoch": 2.77, - "learning_rate": 3.880407124681934e-06, - "loss": 1.4928, + "epoch": 4.14, + "learning_rate": 2.9285714285714288e-05, + "loss": 1.3508, "step": 725 }, { - "epoch": 2.77, - "learning_rate": 3.816793893129772e-06, - "loss": 1.5036, + "epoch": 4.15, + "learning_rate": 2.925714285714286e-05, + "loss": 1.3755, "step": 726 }, { - "epoch": 2.77, - "learning_rate": 3.7531806615776087e-06, - "loss": 1.4223, + "epoch": 4.15, + "learning_rate": 2.9228571428571428e-05, + "loss": 1.3036, "step": 727 }, { - "epoch": 2.78, - "learning_rate": 3.6895674300254453e-06, - "loss": 1.6322, + "epoch": 4.16, + "learning_rate": 2.9199999999999998e-05, + "loss": 1.3768, "step": 728 }, { - "epoch": 2.78, - "learning_rate": 3.6259541984732824e-06, - "loss": 1.4899, + "epoch": 4.17, + "learning_rate": 2.9171428571428572e-05, + "loss": 1.4108, "step": 729 }, { - "epoch": 2.79, - "learning_rate": 3.56234096692112e-06, - "loss": 1.5485, + "epoch": 4.17, + "learning_rate": 2.9142857142857146e-05, + "loss": 1.2796, "step": 730 }, { - "epoch": 2.79, - "learning_rate": 3.498727735368957e-06, - "loss": 1.4896, + "epoch": 4.18, + "learning_rate": 2.9114285714285716e-05, + "loss": 1.365, "step": 731 }, { - "epoch": 2.79, - "learning_rate": 3.4351145038167944e-06, - "loss": 1.5388, + "epoch": 4.18, + "learning_rate": 2.908571428571429e-05, + "loss": 1.3352, "step": 732 }, { - "epoch": 2.8, - "learning_rate": 3.3715012722646315e-06, - "loss": 1.5161, + "epoch": 4.19, + "learning_rate": 2.905714285714286e-05, + "loss": 1.306, "step": 733 }, { - "epoch": 2.8, - "learning_rate": 3.307888040712468e-06, - "loss": 1.5903, + "epoch": 4.19, + "learning_rate": 2.9028571428571427e-05, + "loss": 1.3322, "step": 734 }, { - "epoch": 2.81, - "learning_rate": 3.2442748091603052e-06, - "loss": 1.5086, + "epoch": 4.2, + "learning_rate": 2.9e-05, + "loss": 1.2859, "step": 735 }, { - "epoch": 2.81, - "learning_rate": 3.1806615776081427e-06, - "loss": 1.5234, + "epoch": 4.21, + "learning_rate": 2.897142857142857e-05, + "loss": 1.3707, "step": 736 }, { - "epoch": 2.81, - "learning_rate": 3.1170483460559798e-06, - "loss": 1.4371, + "epoch": 4.21, + "learning_rate": 2.8942857142857144e-05, + "loss": 1.3309, "step": 737 }, { - "epoch": 2.82, - "learning_rate": 3.053435114503817e-06, - "loss": 1.4445, + "epoch": 4.22, + "learning_rate": 2.8914285714285714e-05, + "loss": 1.2728, "step": 738 }, { - "epoch": 2.82, - "learning_rate": 2.989821882951654e-06, - "loss": 1.4871, + "epoch": 4.22, + "learning_rate": 2.8885714285714288e-05, + "loss": 1.3801, "step": 739 }, { - "epoch": 2.82, - "learning_rate": 2.9262086513994914e-06, - "loss": 1.4505, + "epoch": 4.23, + "learning_rate": 2.885714285714286e-05, + "loss": 1.3491, "step": 740 }, { - "epoch": 2.83, - "learning_rate": 2.8625954198473285e-06, - "loss": 1.4307, + "epoch": 4.23, + "learning_rate": 2.8828571428571432e-05, + "loss": 1.3162, "step": 741 }, { - "epoch": 2.83, - "learning_rate": 2.7989821882951656e-06, - "loss": 1.5002, + "epoch": 4.24, + "learning_rate": 2.88e-05, + "loss": 1.2886, "step": 742 }, { - "epoch": 2.84, - "learning_rate": 2.735368956743003e-06, - "loss": 1.4684, + "epoch": 4.25, + "learning_rate": 2.8771428571428572e-05, + "loss": 1.2986, "step": 743 }, { - "epoch": 2.84, - "learning_rate": 2.6717557251908397e-06, - "loss": 1.4525, + "epoch": 4.25, + "learning_rate": 2.8742857142857143e-05, + "loss": 1.3365, "step": 744 }, { - "epoch": 2.84, - "learning_rate": 2.608142493638677e-06, - "loss": 1.4024, + "epoch": 4.26, + "learning_rate": 2.8714285714285716e-05, + "loss": 1.4407, "step": 745 }, { - "epoch": 2.85, - "learning_rate": 2.544529262086514e-06, - "loss": 1.4652, + "epoch": 4.26, + "learning_rate": 2.8685714285714286e-05, + "loss": 1.3204, "step": 746 }, { - "epoch": 2.85, - "learning_rate": 2.4809160305343513e-06, - "loss": 1.4308, + "epoch": 4.27, + "learning_rate": 2.865714285714286e-05, + "loss": 1.3159, "step": 747 }, { - "epoch": 2.85, - "learning_rate": 2.4173027989821884e-06, - "loss": 1.4592, + "epoch": 4.27, + "learning_rate": 2.8628571428571434e-05, + "loss": 1.2222, "step": 748 }, { - "epoch": 2.86, - "learning_rate": 2.3536895674300255e-06, - "loss": 1.3863, + "epoch": 4.28, + "learning_rate": 2.86e-05, + "loss": 1.2952, "step": 749 }, { - "epoch": 2.86, - "learning_rate": 2.2900763358778625e-06, - "loss": 1.4913, + "epoch": 4.29, + "learning_rate": 2.857142857142857e-05, + "loss": 1.3046, "step": 750 }, { - "epoch": 2.87, - "learning_rate": 2.2264631043257e-06, - "loss": 1.5188, + "epoch": 4.29, + "learning_rate": 2.8542857142857144e-05, + "loss": 1.3444, "step": 751 }, { - "epoch": 2.87, - "learning_rate": 2.162849872773537e-06, - "loss": 1.4573, + "epoch": 4.3, + "learning_rate": 2.8514285714285715e-05, + "loss": 1.4729, "step": 752 }, { - "epoch": 2.87, - "learning_rate": 2.099236641221374e-06, - "loss": 1.5268, + "epoch": 4.3, + "learning_rate": 2.848571428571429e-05, + "loss": 1.3795, "step": 753 }, { - "epoch": 2.88, - "learning_rate": 2.0356234096692112e-06, - "loss": 1.6501, + "epoch": 4.31, + "learning_rate": 2.845714285714286e-05, + "loss": 1.259, "step": 754 }, { - "epoch": 2.88, - "learning_rate": 1.9720101781170483e-06, - "loss": 1.6106, + "epoch": 4.31, + "learning_rate": 2.8428571428571432e-05, + "loss": 1.4053, "step": 755 }, { - "epoch": 2.89, - "learning_rate": 1.908396946564886e-06, - "loss": 1.463, + "epoch": 4.32, + "learning_rate": 2.84e-05, + "loss": 1.3372, "step": 756 }, { - "epoch": 2.89, - "learning_rate": 1.8447837150127227e-06, - "loss": 1.6244, + "epoch": 4.33, + "learning_rate": 2.837142857142857e-05, + "loss": 1.3185, "step": 757 }, { - "epoch": 2.89, - "learning_rate": 1.78117048346056e-06, - "loss": 1.4751, + "epoch": 4.33, + "learning_rate": 2.8342857142857143e-05, + "loss": 1.3754, "step": 758 }, { - "epoch": 2.9, - "learning_rate": 1.7175572519083972e-06, - "loss": 1.4866, + "epoch": 4.34, + "learning_rate": 2.8314285714285717e-05, + "loss": 1.3666, "step": 759 }, { - "epoch": 2.9, - "learning_rate": 1.653944020356234e-06, - "loss": 1.5802, + "epoch": 4.34, + "learning_rate": 2.8285714285714287e-05, + "loss": 1.2839, "step": 760 }, { - "epoch": 2.9, - "learning_rate": 1.5903307888040714e-06, - "loss": 1.4904, + "epoch": 4.35, + "learning_rate": 2.825714285714286e-05, + "loss": 1.3134, "step": 761 }, { - "epoch": 2.91, - "learning_rate": 1.5267175572519084e-06, - "loss": 1.3811, + "epoch": 4.35, + "learning_rate": 2.822857142857143e-05, + "loss": 1.3155, "step": 762 }, { - "epoch": 2.91, - "learning_rate": 1.4631043256997457e-06, - "loss": 1.4883, + "epoch": 4.36, + "learning_rate": 2.8199999999999998e-05, + "loss": 1.3315, "step": 763 }, { - "epoch": 2.92, - "learning_rate": 1.3994910941475828e-06, - "loss": 1.5247, + "epoch": 4.37, + "learning_rate": 2.817142857142857e-05, + "loss": 1.3228, "step": 764 }, { - "epoch": 2.92, - "learning_rate": 1.3358778625954198e-06, - "loss": 1.5334, + "epoch": 4.37, + "learning_rate": 2.814285714285714e-05, + "loss": 1.3601, "step": 765 }, { - "epoch": 2.92, - "learning_rate": 1.272264631043257e-06, - "loss": 1.514, + "epoch": 4.38, + "learning_rate": 2.8114285714285715e-05, + "loss": 1.3179, "step": 766 }, { - "epoch": 2.93, - "learning_rate": 1.2086513994910942e-06, - "loss": 1.4731, + "epoch": 4.38, + "learning_rate": 2.808571428571429e-05, + "loss": 1.3243, "step": 767 }, { - "epoch": 2.93, - "learning_rate": 1.1450381679389313e-06, - "loss": 1.486, + "epoch": 4.39, + "learning_rate": 2.805714285714286e-05, + "loss": 1.247, "step": 768 }, { - "epoch": 2.94, - "learning_rate": 1.0814249363867685e-06, - "loss": 1.5147, + "epoch": 4.39, + "learning_rate": 2.8028571428571433e-05, + "loss": 1.2989, "step": 769 }, { - "epoch": 2.94, - "learning_rate": 1.0178117048346056e-06, - "loss": 1.4505, + "epoch": 4.4, + "learning_rate": 2.8000000000000003e-05, + "loss": 1.3347, "step": 770 }, { - "epoch": 2.94, - "learning_rate": 9.54198473282443e-07, - "loss": 1.4967, + "epoch": 4.41, + "learning_rate": 2.797142857142857e-05, + "loss": 1.3562, "step": 771 }, { - "epoch": 2.95, - "learning_rate": 8.9058524173028e-07, - "loss": 1.5572, + "epoch": 4.41, + "learning_rate": 2.7942857142857143e-05, + "loss": 1.331, "step": 772 }, { - "epoch": 2.95, - "learning_rate": 8.26972010178117e-07, - "loss": 1.5879, + "epoch": 4.42, + "learning_rate": 2.7914285714285714e-05, + "loss": 1.3048, "step": 773 }, { - "epoch": 2.95, - "learning_rate": 7.633587786259542e-07, - "loss": 1.5198, + "epoch": 4.42, + "learning_rate": 2.7885714285714287e-05, + "loss": 1.3037, "step": 774 }, { - "epoch": 2.96, - "learning_rate": 6.997455470737914e-07, - "loss": 1.4886, + "epoch": 4.43, + "learning_rate": 2.785714285714286e-05, + "loss": 1.339, "step": 775 }, { - "epoch": 2.96, - "learning_rate": 6.361323155216285e-07, - "loss": 1.4612, + "epoch": 4.43, + "learning_rate": 2.782857142857143e-05, + "loss": 1.3303, "step": 776 }, { - "epoch": 2.97, - "learning_rate": 5.725190839694656e-07, - "loss": 1.5806, + "epoch": 4.44, + "learning_rate": 2.7800000000000005e-05, + "loss": 1.384, "step": 777 }, { - "epoch": 2.97, - "learning_rate": 5.089058524173028e-07, - "loss": 1.5416, + "epoch": 4.45, + "learning_rate": 2.7771428571428572e-05, + "loss": 1.4252, "step": 778 }, { - "epoch": 2.97, - "learning_rate": 4.4529262086514e-07, - "loss": 1.5734, + "epoch": 4.45, + "learning_rate": 2.7742857142857142e-05, + "loss": 1.3753, "step": 779 }, { - "epoch": 2.98, - "learning_rate": 3.816793893129771e-07, - "loss": 1.5684, + "epoch": 4.46, + "learning_rate": 2.7714285714285716e-05, + "loss": 1.2363, "step": 780 }, { - "epoch": 2.98, - "learning_rate": 3.1806615776081423e-07, - "loss": 1.4813, + "epoch": 4.46, + "learning_rate": 2.7685714285714286e-05, + "loss": 1.2956, "step": 781 }, { - "epoch": 2.98, - "learning_rate": 2.544529262086514e-07, - "loss": 1.5316, + "epoch": 4.47, + "learning_rate": 2.765714285714286e-05, + "loss": 1.3182, "step": 782 }, { - "epoch": 2.99, - "learning_rate": 1.9083969465648855e-07, - "loss": 1.4848, + "epoch": 4.47, + "learning_rate": 2.762857142857143e-05, + "loss": 1.3055, "step": 783 }, { - "epoch": 2.99, - "learning_rate": 1.272264631043257e-07, - "loss": 1.5582, + "epoch": 4.48, + "learning_rate": 2.7600000000000003e-05, + "loss": 1.3547, "step": 784 }, { - "epoch": 3.0, - "learning_rate": 6.361323155216285e-08, - "loss": 1.4645, + "epoch": 4.49, + "learning_rate": 2.757142857142857e-05, + "loss": 1.2879, "step": 785 }, { - "epoch": 3.0, - "learning_rate": 0.0, - "loss": 1.3783, + "epoch": 4.49, + "learning_rate": 2.7542857142857144e-05, + "loss": 1.3005, "step": 786 }, { - "epoch": 3.0, - "step": 786, - "total_flos": 1.2699738389348352e+16, - "train_loss": 1.6923205314095087, - "train_runtime": 822.2959, - "train_samples_per_second": 22.879, - "train_steps_per_second": 0.956 + "epoch": 4.5, + "learning_rate": 2.7514285714285714e-05, + "loss": 1.3595, + "step": 787 + }, + { + "epoch": 4.5, + "learning_rate": 2.7485714285714288e-05, + "loss": 1.3856, + "step": 788 + }, + { + "epoch": 4.51, + "learning_rate": 2.7457142857142858e-05, + "loss": 1.3491, + "step": 789 + }, + { + "epoch": 4.51, + "learning_rate": 2.742857142857143e-05, + "loss": 1.3044, + "step": 790 + }, + { + "epoch": 4.52, + "learning_rate": 2.7400000000000002e-05, + "loss": 1.2415, + "step": 791 + }, + { + "epoch": 4.53, + "learning_rate": 2.737142857142857e-05, + "loss": 1.3315, + "step": 792 + }, + { + "epoch": 4.53, + "learning_rate": 2.7342857142857142e-05, + "loss": 1.3962, + "step": 793 + }, + { + "epoch": 4.54, + "learning_rate": 2.7314285714285716e-05, + "loss": 1.3741, + "step": 794 + }, + { + "epoch": 4.54, + "learning_rate": 2.7285714285714286e-05, + "loss": 1.3343, + "step": 795 + }, + { + "epoch": 4.55, + "learning_rate": 2.725714285714286e-05, + "loss": 1.3336, + "step": 796 + }, + { + "epoch": 4.55, + "learning_rate": 2.722857142857143e-05, + "loss": 1.4101, + "step": 797 + }, + { + "epoch": 4.56, + "learning_rate": 2.7200000000000004e-05, + "loss": 1.3092, + "step": 798 + }, + { + "epoch": 4.57, + "learning_rate": 2.7171428571428574e-05, + "loss": 1.3327, + "step": 799 + }, + { + "epoch": 4.57, + "learning_rate": 2.714285714285714e-05, + "loss": 1.3595, + "step": 800 + }, + { + "epoch": 4.58, + "learning_rate": 2.7114285714285715e-05, + "loss": 1.3042, + "step": 801 + }, + { + "epoch": 4.58, + "learning_rate": 2.7085714285714285e-05, + "loss": 1.2747, + "step": 802 + }, + { + "epoch": 4.59, + "learning_rate": 2.705714285714286e-05, + "loss": 1.2625, + "step": 803 + }, + { + "epoch": 4.59, + "learning_rate": 2.7028571428571432e-05, + "loss": 1.3316, + "step": 804 + }, + { + "epoch": 4.6, + "learning_rate": 2.7000000000000002e-05, + "loss": 1.3095, + "step": 805 + }, + { + "epoch": 4.61, + "learning_rate": 2.6971428571428576e-05, + "loss": 1.3831, + "step": 806 + }, + { + "epoch": 4.61, + "learning_rate": 2.6942857142857143e-05, + "loss": 1.3367, + "step": 807 + }, + { + "epoch": 4.62, + "learning_rate": 2.6914285714285713e-05, + "loss": 1.3466, + "step": 808 + }, + { + "epoch": 4.62, + "learning_rate": 2.6885714285714287e-05, + "loss": 1.2935, + "step": 809 + }, + { + "epoch": 4.63, + "learning_rate": 2.6857142857142857e-05, + "loss": 1.3602, + "step": 810 + }, + { + "epoch": 4.63, + "learning_rate": 2.682857142857143e-05, + "loss": 1.3846, + "step": 811 + }, + { + "epoch": 4.64, + "learning_rate": 2.6800000000000004e-05, + "loss": 1.2971, + "step": 812 + }, + { + "epoch": 4.65, + "learning_rate": 2.6771428571428575e-05, + "loss": 1.2417, + "step": 813 + }, + { + "epoch": 4.65, + "learning_rate": 2.674285714285714e-05, + "loss": 1.3028, + "step": 814 + }, + { + "epoch": 4.66, + "learning_rate": 2.6714285714285715e-05, + "loss": 1.3421, + "step": 815 + }, + { + "epoch": 4.66, + "learning_rate": 2.6685714285714285e-05, + "loss": 1.3303, + "step": 816 + }, + { + "epoch": 4.67, + "learning_rate": 2.665714285714286e-05, + "loss": 1.4346, + "step": 817 + }, + { + "epoch": 4.67, + "learning_rate": 2.662857142857143e-05, + "loss": 1.3082, + "step": 818 + }, + { + "epoch": 4.68, + "learning_rate": 2.6600000000000003e-05, + "loss": 1.2856, + "step": 819 + }, + { + "epoch": 4.69, + "learning_rate": 2.6571428571428576e-05, + "loss": 1.2811, + "step": 820 + }, + { + "epoch": 4.69, + "learning_rate": 2.654285714285714e-05, + "loss": 1.4053, + "step": 821 + }, + { + "epoch": 4.7, + "learning_rate": 2.6514285714285714e-05, + "loss": 1.3435, + "step": 822 + }, + { + "epoch": 4.7, + "learning_rate": 2.6485714285714287e-05, + "loss": 1.2782, + "step": 823 + }, + { + "epoch": 4.71, + "learning_rate": 2.6457142857142857e-05, + "loss": 1.3065, + "step": 824 + }, + { + "epoch": 4.71, + "learning_rate": 2.642857142857143e-05, + "loss": 1.3341, + "step": 825 + }, + { + "epoch": 4.72, + "learning_rate": 2.64e-05, + "loss": 1.2693, + "step": 826 + }, + { + "epoch": 4.73, + "learning_rate": 2.6371428571428575e-05, + "loss": 1.2947, + "step": 827 + }, + { + "epoch": 4.73, + "learning_rate": 2.6342857142857142e-05, + "loss": 1.3146, + "step": 828 + }, + { + "epoch": 4.74, + "learning_rate": 2.6314285714285712e-05, + "loss": 1.289, + "step": 829 + }, + { + "epoch": 4.74, + "learning_rate": 2.6285714285714286e-05, + "loss": 1.3284, + "step": 830 + }, + { + "epoch": 4.75, + "learning_rate": 2.625714285714286e-05, + "loss": 1.2655, + "step": 831 + }, + { + "epoch": 4.75, + "learning_rate": 2.622857142857143e-05, + "loss": 1.4469, + "step": 832 + }, + { + "epoch": 4.76, + "learning_rate": 2.6200000000000003e-05, + "loss": 1.2999, + "step": 833 + }, + { + "epoch": 4.77, + "learning_rate": 2.6171428571428574e-05, + "loss": 1.3227, + "step": 834 + }, + { + "epoch": 4.77, + "learning_rate": 2.6142857142857147e-05, + "loss": 1.337, + "step": 835 + }, + { + "epoch": 4.78, + "learning_rate": 2.6114285714285714e-05, + "loss": 1.3279, + "step": 836 + }, + { + "epoch": 4.78, + "learning_rate": 2.6085714285714284e-05, + "loss": 1.291, + "step": 837 + }, + { + "epoch": 4.79, + "learning_rate": 2.6057142857142858e-05, + "loss": 1.4128, + "step": 838 + }, + { + "epoch": 4.79, + "learning_rate": 2.602857142857143e-05, + "loss": 1.3253, + "step": 839 + }, + { + "epoch": 4.8, + "learning_rate": 2.6000000000000002e-05, + "loss": 1.343, + "step": 840 + }, + { + "epoch": 4.81, + "learning_rate": 2.5971428571428575e-05, + "loss": 1.2943, + "step": 841 + }, + { + "epoch": 4.81, + "learning_rate": 2.5942857142857146e-05, + "loss": 1.387, + "step": 842 + }, + { + "epoch": 4.82, + "learning_rate": 2.5914285714285713e-05, + "loss": 1.2921, + "step": 843 + }, + { + "epoch": 4.82, + "learning_rate": 2.5885714285714286e-05, + "loss": 1.4114, + "step": 844 + }, + { + "epoch": 4.83, + "learning_rate": 2.5857142857142856e-05, + "loss": 1.4293, + "step": 845 + }, + { + "epoch": 4.83, + "learning_rate": 2.582857142857143e-05, + "loss": 1.2976, + "step": 846 + }, + { + "epoch": 4.84, + "learning_rate": 2.58e-05, + "loss": 1.3084, + "step": 847 + }, + { + "epoch": 4.85, + "learning_rate": 2.5771428571428574e-05, + "loss": 1.3507, + "step": 848 + }, + { + "epoch": 4.85, + "learning_rate": 2.5742857142857148e-05, + "loss": 1.3024, + "step": 849 + }, + { + "epoch": 4.86, + "learning_rate": 2.5714285714285714e-05, + "loss": 1.2941, + "step": 850 + }, + { + "epoch": 4.86, + "learning_rate": 2.5685714285714285e-05, + "loss": 1.3075, + "step": 851 + }, + { + "epoch": 4.87, + "learning_rate": 2.565714285714286e-05, + "loss": 1.2879, + "step": 852 + }, + { + "epoch": 4.87, + "learning_rate": 2.562857142857143e-05, + "loss": 1.3169, + "step": 853 + }, + { + "epoch": 4.88, + "learning_rate": 2.5600000000000002e-05, + "loss": 1.3521, + "step": 854 + }, + { + "epoch": 4.89, + "learning_rate": 2.5571428571428572e-05, + "loss": 1.3113, + "step": 855 + }, + { + "epoch": 4.89, + "learning_rate": 2.5542857142857146e-05, + "loss": 1.3631, + "step": 856 + }, + { + "epoch": 4.9, + "learning_rate": 2.5514285714285713e-05, + "loss": 1.3454, + "step": 857 + }, + { + "epoch": 4.9, + "learning_rate": 2.5485714285714287e-05, + "loss": 1.3585, + "step": 858 + }, + { + "epoch": 4.91, + "learning_rate": 2.5457142857142857e-05, + "loss": 1.4251, + "step": 859 + }, + { + "epoch": 4.91, + "learning_rate": 2.542857142857143e-05, + "loss": 1.4008, + "step": 860 + }, + { + "epoch": 4.92, + "learning_rate": 2.54e-05, + "loss": 1.3459, + "step": 861 + }, + { + "epoch": 4.93, + "learning_rate": 2.5371428571428574e-05, + "loss": 1.3039, + "step": 862 + }, + { + "epoch": 4.93, + "learning_rate": 2.5342857142857145e-05, + "loss": 1.3746, + "step": 863 + }, + { + "epoch": 4.94, + "learning_rate": 2.5314285714285718e-05, + "loss": 1.3884, + "step": 864 + }, + { + "epoch": 4.94, + "learning_rate": 2.5285714285714285e-05, + "loss": 1.351, + "step": 865 + }, + { + "epoch": 4.95, + "learning_rate": 2.5257142857142855e-05, + "loss": 1.2113, + "step": 866 + }, + { + "epoch": 4.95, + "learning_rate": 2.522857142857143e-05, + "loss": 1.3051, + "step": 867 + }, + { + "epoch": 4.96, + "learning_rate": 2.5200000000000003e-05, + "loss": 1.3643, + "step": 868 + }, + { + "epoch": 4.97, + "learning_rate": 2.5171428571428573e-05, + "loss": 1.2819, + "step": 869 + }, + { + "epoch": 4.97, + "learning_rate": 2.5142857142857147e-05, + "loss": 1.3283, + "step": 870 + }, + { + "epoch": 4.98, + "learning_rate": 2.5114285714285717e-05, + "loss": 1.3209, + "step": 871 + }, + { + "epoch": 4.98, + "learning_rate": 2.5085714285714284e-05, + "loss": 1.3381, + "step": 872 + }, + { + "epoch": 4.99, + "learning_rate": 2.5057142857142857e-05, + "loss": 1.3348, + "step": 873 + }, + { + "epoch": 4.99, + "learning_rate": 2.5028571428571428e-05, + "loss": 1.3479, + "step": 874 + }, + { + "epoch": 5.0, + "learning_rate": 2.5e-05, + "loss": 1.3556, + "step": 875 + }, + { + "epoch": 5.01, + "learning_rate": 2.4971428571428575e-05, + "loss": 1.2632, + "step": 876 + }, + { + "epoch": 5.01, + "learning_rate": 2.4942857142857142e-05, + "loss": 1.2726, + "step": 877 + }, + { + "epoch": 5.02, + "learning_rate": 2.4914285714285715e-05, + "loss": 1.2451, + "step": 878 + }, + { + "epoch": 5.02, + "learning_rate": 2.4885714285714286e-05, + "loss": 1.2124, + "step": 879 + }, + { + "epoch": 5.03, + "learning_rate": 2.485714285714286e-05, + "loss": 1.2437, + "step": 880 + }, + { + "epoch": 5.03, + "learning_rate": 2.482857142857143e-05, + "loss": 1.259, + "step": 881 + }, + { + "epoch": 5.04, + "learning_rate": 2.48e-05, + "loss": 1.3374, + "step": 882 + }, + { + "epoch": 5.05, + "learning_rate": 2.4771428571428573e-05, + "loss": 1.2851, + "step": 883 + }, + { + "epoch": 5.05, + "learning_rate": 2.4742857142857147e-05, + "loss": 1.3082, + "step": 884 + }, + { + "epoch": 5.06, + "learning_rate": 2.4714285714285714e-05, + "loss": 1.3048, + "step": 885 + }, + { + "epoch": 5.06, + "learning_rate": 2.4685714285714288e-05, + "loss": 1.2108, + "step": 886 + }, + { + "epoch": 5.07, + "learning_rate": 2.4657142857142858e-05, + "loss": 1.296, + "step": 887 + }, + { + "epoch": 5.07, + "learning_rate": 2.4628571428571428e-05, + "loss": 1.2827, + "step": 888 + }, + { + "epoch": 5.08, + "learning_rate": 2.46e-05, + "loss": 1.271, + "step": 889 + }, + { + "epoch": 5.09, + "learning_rate": 2.4571428571428572e-05, + "loss": 1.3433, + "step": 890 + }, + { + "epoch": 5.09, + "learning_rate": 2.4542857142857146e-05, + "loss": 1.2282, + "step": 891 + }, + { + "epoch": 5.1, + "learning_rate": 2.4514285714285716e-05, + "loss": 1.3487, + "step": 892 + }, + { + "epoch": 5.1, + "learning_rate": 2.4485714285714286e-05, + "loss": 1.2905, + "step": 893 + }, + { + "epoch": 5.11, + "learning_rate": 2.445714285714286e-05, + "loss": 1.3112, + "step": 894 + }, + { + "epoch": 5.11, + "learning_rate": 2.442857142857143e-05, + "loss": 1.32, + "step": 895 + }, + { + "epoch": 5.12, + "learning_rate": 2.44e-05, + "loss": 1.3555, + "step": 896 + }, + { + "epoch": 5.13, + "learning_rate": 2.4371428571428574e-05, + "loss": 1.231, + "step": 897 + }, + { + "epoch": 5.13, + "learning_rate": 2.4342857142857144e-05, + "loss": 1.3232, + "step": 898 + }, + { + "epoch": 5.14, + "learning_rate": 2.4314285714285714e-05, + "loss": 1.3095, + "step": 899 + }, + { + "epoch": 5.14, + "learning_rate": 2.4285714285714288e-05, + "loss": 1.2877, + "step": 900 + }, + { + "epoch": 5.15, + "learning_rate": 2.4257142857142858e-05, + "loss": 1.2923, + "step": 901 + }, + { + "epoch": 5.15, + "learning_rate": 2.4228571428571432e-05, + "loss": 1.1921, + "step": 902 + }, + { + "epoch": 5.16, + "learning_rate": 2.4200000000000002e-05, + "loss": 1.2367, + "step": 903 + }, + { + "epoch": 5.17, + "learning_rate": 2.4171428571428572e-05, + "loss": 1.3188, + "step": 904 + }, + { + "epoch": 5.17, + "learning_rate": 2.4142857142857146e-05, + "loss": 1.2978, + "step": 905 + }, + { + "epoch": 5.18, + "learning_rate": 2.4114285714285713e-05, + "loss": 1.3123, + "step": 906 + }, + { + "epoch": 5.18, + "learning_rate": 2.4085714285714286e-05, + "loss": 1.2387, + "step": 907 + }, + { + "epoch": 5.19, + "learning_rate": 2.405714285714286e-05, + "loss": 1.2904, + "step": 908 + }, + { + "epoch": 5.19, + "learning_rate": 2.402857142857143e-05, + "loss": 1.258, + "step": 909 + }, + { + "epoch": 5.2, + "learning_rate": 2.4e-05, + "loss": 1.2735, + "step": 910 + }, + { + "epoch": 5.21, + "learning_rate": 2.397142857142857e-05, + "loss": 1.2948, + "step": 911 + }, + { + "epoch": 5.21, + "learning_rate": 2.3942857142857144e-05, + "loss": 1.3037, + "step": 912 + }, + { + "epoch": 5.22, + "learning_rate": 2.3914285714285715e-05, + "loss": 1.3115, + "step": 913 + }, + { + "epoch": 5.22, + "learning_rate": 2.3885714285714285e-05, + "loss": 1.3237, + "step": 914 + }, + { + "epoch": 5.23, + "learning_rate": 2.385714285714286e-05, + "loss": 1.3139, + "step": 915 + }, + { + "epoch": 5.23, + "learning_rate": 2.3828571428571432e-05, + "loss": 1.3182, + "step": 916 + }, + { + "epoch": 5.24, + "learning_rate": 2.38e-05, + "loss": 1.2913, + "step": 917 + }, + { + "epoch": 5.25, + "learning_rate": 2.3771428571428573e-05, + "loss": 1.2651, + "step": 918 + }, + { + "epoch": 5.25, + "learning_rate": 2.3742857142857143e-05, + "loss": 1.3385, + "step": 919 + }, + { + "epoch": 5.26, + "learning_rate": 2.3714285714285717e-05, + "loss": 1.3538, + "step": 920 + }, + { + "epoch": 5.26, + "learning_rate": 2.3685714285714287e-05, + "loss": 1.3486, + "step": 921 + }, + { + "epoch": 5.27, + "learning_rate": 2.3657142857142857e-05, + "loss": 1.2768, + "step": 922 + }, + { + "epoch": 5.27, + "learning_rate": 2.362857142857143e-05, + "loss": 1.2944, + "step": 923 + }, + { + "epoch": 5.28, + "learning_rate": 2.36e-05, + "loss": 1.348, + "step": 924 + }, + { + "epoch": 5.29, + "learning_rate": 2.357142857142857e-05, + "loss": 1.3669, + "step": 925 + }, + { + "epoch": 5.29, + "learning_rate": 2.3542857142857145e-05, + "loss": 1.2474, + "step": 926 + }, + { + "epoch": 5.3, + "learning_rate": 2.3514285714285715e-05, + "loss": 1.4053, + "step": 927 + }, + { + "epoch": 5.3, + "learning_rate": 2.3485714285714285e-05, + "loss": 1.287, + "step": 928 + }, + { + "epoch": 5.31, + "learning_rate": 2.345714285714286e-05, + "loss": 1.2992, + "step": 929 + }, + { + "epoch": 5.31, + "learning_rate": 2.342857142857143e-05, + "loss": 1.277, + "step": 930 + }, + { + "epoch": 5.32, + "learning_rate": 2.3400000000000003e-05, + "loss": 1.2658, + "step": 931 + }, + { + "epoch": 5.33, + "learning_rate": 2.3371428571428573e-05, + "loss": 1.2591, + "step": 932 + }, + { + "epoch": 5.33, + "learning_rate": 2.3342857142857143e-05, + "loss": 1.2843, + "step": 933 + }, + { + "epoch": 5.34, + "learning_rate": 2.3314285714285717e-05, + "loss": 1.2686, + "step": 934 + }, + { + "epoch": 5.34, + "learning_rate": 2.3285714285714287e-05, + "loss": 1.2783, + "step": 935 + }, + { + "epoch": 5.35, + "learning_rate": 2.3257142857142858e-05, + "loss": 1.2888, + "step": 936 + }, + { + "epoch": 5.35, + "learning_rate": 2.322857142857143e-05, + "loss": 1.3207, + "step": 937 + }, + { + "epoch": 5.36, + "learning_rate": 2.32e-05, + "loss": 1.2825, + "step": 938 + }, + { + "epoch": 5.37, + "learning_rate": 2.3171428571428572e-05, + "loss": 1.2808, + "step": 939 + }, + { + "epoch": 5.37, + "learning_rate": 2.3142857142857145e-05, + "loss": 1.2387, + "step": 940 + }, + { + "epoch": 5.38, + "learning_rate": 2.3114285714285716e-05, + "loss": 1.3441, + "step": 941 + }, + { + "epoch": 5.38, + "learning_rate": 2.3085714285714286e-05, + "loss": 1.3292, + "step": 942 + }, + { + "epoch": 5.39, + "learning_rate": 2.3057142857142856e-05, + "loss": 1.2735, + "step": 943 + }, + { + "epoch": 5.39, + "learning_rate": 2.302857142857143e-05, + "loss": 1.2249, + "step": 944 + }, + { + "epoch": 5.4, + "learning_rate": 2.3000000000000003e-05, + "loss": 1.2249, + "step": 945 + }, + { + "epoch": 5.41, + "learning_rate": 2.297142857142857e-05, + "loss": 1.1812, + "step": 946 + }, + { + "epoch": 5.41, + "learning_rate": 2.2942857142857144e-05, + "loss": 1.3092, + "step": 947 + }, + { + "epoch": 5.42, + "learning_rate": 2.2914285714285718e-05, + "loss": 1.3148, + "step": 948 + }, + { + "epoch": 5.42, + "learning_rate": 2.2885714285714288e-05, + "loss": 1.1908, + "step": 949 + }, + { + "epoch": 5.43, + "learning_rate": 2.2857142857142858e-05, + "loss": 1.2542, + "step": 950 + }, + { + "epoch": 5.43, + "learning_rate": 2.2828571428571428e-05, + "loss": 1.1672, + "step": 951 + }, + { + "epoch": 5.44, + "learning_rate": 2.2800000000000002e-05, + "loss": 1.2224, + "step": 952 + }, + { + "epoch": 5.45, + "learning_rate": 2.2771428571428572e-05, + "loss": 1.2576, + "step": 953 + }, + { + "epoch": 5.45, + "learning_rate": 2.2742857142857142e-05, + "loss": 1.2776, + "step": 954 + }, + { + "epoch": 5.46, + "learning_rate": 2.2714285714285716e-05, + "loss": 1.3872, + "step": 955 + }, + { + "epoch": 5.46, + "learning_rate": 2.2685714285714286e-05, + "loss": 1.2355, + "step": 956 + }, + { + "epoch": 5.47, + "learning_rate": 2.2657142857142857e-05, + "loss": 1.2203, + "step": 957 + }, + { + "epoch": 5.47, + "learning_rate": 2.262857142857143e-05, + "loss": 1.2918, + "step": 958 + }, + { + "epoch": 5.48, + "learning_rate": 2.26e-05, + "loss": 1.3314, + "step": 959 + }, + { + "epoch": 5.49, + "learning_rate": 2.257142857142857e-05, + "loss": 1.23, + "step": 960 + }, + { + "epoch": 5.49, + "learning_rate": 2.2542857142857144e-05, + "loss": 1.2755, + "step": 961 + }, + { + "epoch": 5.5, + "learning_rate": 2.2514285714285715e-05, + "loss": 1.2348, + "step": 962 + }, + { + "epoch": 5.5, + "learning_rate": 2.2485714285714288e-05, + "loss": 1.3726, + "step": 963 + }, + { + "epoch": 5.51, + "learning_rate": 2.245714285714286e-05, + "loss": 1.2435, + "step": 964 + }, + { + "epoch": 5.51, + "learning_rate": 2.242857142857143e-05, + "loss": 1.3648, + "step": 965 + }, + { + "epoch": 5.52, + "learning_rate": 2.2400000000000002e-05, + "loss": 1.3117, + "step": 966 + }, + { + "epoch": 5.53, + "learning_rate": 2.2371428571428573e-05, + "loss": 1.2374, + "step": 967 + }, + { + "epoch": 5.53, + "learning_rate": 2.2342857142857143e-05, + "loss": 1.2719, + "step": 968 + }, + { + "epoch": 5.54, + "learning_rate": 2.2314285714285717e-05, + "loss": 1.178, + "step": 969 + }, + { + "epoch": 5.54, + "learning_rate": 2.2285714285714287e-05, + "loss": 1.2103, + "step": 970 + }, + { + "epoch": 5.55, + "learning_rate": 2.2257142857142857e-05, + "loss": 1.2689, + "step": 971 + }, + { + "epoch": 5.55, + "learning_rate": 2.222857142857143e-05, + "loss": 1.2282, + "step": 972 + }, + { + "epoch": 5.56, + "learning_rate": 2.22e-05, + "loss": 1.2303, + "step": 973 + }, + { + "epoch": 5.57, + "learning_rate": 2.2171428571428575e-05, + "loss": 1.3065, + "step": 974 + }, + { + "epoch": 5.57, + "learning_rate": 2.214285714285714e-05, + "loss": 1.2426, + "step": 975 + }, + { + "epoch": 5.58, + "learning_rate": 2.2114285714285715e-05, + "loss": 1.2496, + "step": 976 + }, + { + "epoch": 5.58, + "learning_rate": 2.208571428571429e-05, + "loss": 1.2837, + "step": 977 + }, + { + "epoch": 5.59, + "learning_rate": 2.205714285714286e-05, + "loss": 1.2821, + "step": 978 + }, + { + "epoch": 5.59, + "learning_rate": 2.202857142857143e-05, + "loss": 1.2269, + "step": 979 + }, + { + "epoch": 5.6, + "learning_rate": 2.2000000000000003e-05, + "loss": 1.2415, + "step": 980 + }, + { + "epoch": 5.61, + "learning_rate": 2.1971428571428573e-05, + "loss": 1.2911, + "step": 981 + }, + { + "epoch": 5.61, + "learning_rate": 2.1942857142857143e-05, + "loss": 1.2307, + "step": 982 + }, + { + "epoch": 5.62, + "learning_rate": 2.1914285714285714e-05, + "loss": 1.2983, + "step": 983 + }, + { + "epoch": 5.62, + "learning_rate": 2.1885714285714287e-05, + "loss": 1.3145, + "step": 984 + }, + { + "epoch": 5.63, + "learning_rate": 2.185714285714286e-05, + "loss": 1.2768, + "step": 985 + }, + { + "epoch": 5.63, + "learning_rate": 2.1828571428571428e-05, + "loss": 1.2651, + "step": 986 + }, + { + "epoch": 5.64, + "learning_rate": 2.18e-05, + "loss": 1.296, + "step": 987 + }, + { + "epoch": 5.65, + "learning_rate": 2.177142857142857e-05, + "loss": 1.2726, + "step": 988 + }, + { + "epoch": 5.65, + "learning_rate": 2.1742857142857142e-05, + "loss": 1.2601, + "step": 989 + }, + { + "epoch": 5.66, + "learning_rate": 2.1714285714285715e-05, + "loss": 1.261, + "step": 990 + }, + { + "epoch": 5.66, + "learning_rate": 2.1685714285714286e-05, + "loss": 1.2642, + "step": 991 + }, + { + "epoch": 5.67, + "learning_rate": 2.165714285714286e-05, + "loss": 1.2386, + "step": 992 + }, + { + "epoch": 5.67, + "learning_rate": 2.162857142857143e-05, + "loss": 1.2405, + "step": 993 + }, + { + "epoch": 5.68, + "learning_rate": 2.16e-05, + "loss": 1.3141, + "step": 994 + }, + { + "epoch": 5.69, + "learning_rate": 2.1571428571428574e-05, + "loss": 1.2544, + "step": 995 + }, + { + "epoch": 5.69, + "learning_rate": 2.1542857142857144e-05, + "loss": 1.3342, + "step": 996 + }, + { + "epoch": 5.7, + "learning_rate": 2.1514285714285714e-05, + "loss": 1.2918, + "step": 997 + }, + { + "epoch": 5.7, + "learning_rate": 2.1485714285714288e-05, + "loss": 1.2641, + "step": 998 + }, + { + "epoch": 5.71, + "learning_rate": 2.1457142857142858e-05, + "loss": 1.24, + "step": 999 + }, + { + "epoch": 5.71, + "learning_rate": 2.1428571428571428e-05, + "loss": 1.3657, + "step": 1000 + }, + { + "epoch": 5.72, + "learning_rate": 2.1400000000000002e-05, + "loss": 1.2104, + "step": 1001 + }, + { + "epoch": 5.73, + "learning_rate": 2.1371428571428572e-05, + "loss": 1.3424, + "step": 1002 + }, + { + "epoch": 5.73, + "learning_rate": 2.1342857142857146e-05, + "loss": 1.2253, + "step": 1003 + }, + { + "epoch": 5.74, + "learning_rate": 2.1314285714285716e-05, + "loss": 1.26, + "step": 1004 + }, + { + "epoch": 5.74, + "learning_rate": 2.1285714285714286e-05, + "loss": 1.3178, + "step": 1005 + }, + { + "epoch": 5.75, + "learning_rate": 2.125714285714286e-05, + "loss": 1.3444, + "step": 1006 + }, + { + "epoch": 5.75, + "learning_rate": 2.1228571428571427e-05, + "loss": 1.3016, + "step": 1007 + }, + { + "epoch": 5.76, + "learning_rate": 2.12e-05, + "loss": 1.3347, + "step": 1008 + }, + { + "epoch": 5.77, + "learning_rate": 2.1171428571428574e-05, + "loss": 1.2548, + "step": 1009 + }, + { + "epoch": 5.77, + "learning_rate": 2.1142857142857144e-05, + "loss": 1.2495, + "step": 1010 + }, + { + "epoch": 5.78, + "learning_rate": 2.1114285714285714e-05, + "loss": 1.2293, + "step": 1011 + }, + { + "epoch": 5.78, + "learning_rate": 2.1085714285714288e-05, + "loss": 1.2683, + "step": 1012 + }, + { + "epoch": 5.79, + "learning_rate": 2.105714285714286e-05, + "loss": 1.2343, + "step": 1013 + }, + { + "epoch": 5.79, + "learning_rate": 2.1028571428571432e-05, + "loss": 1.2642, + "step": 1014 + }, + { + "epoch": 5.8, + "learning_rate": 2.1e-05, + "loss": 1.2858, + "step": 1015 + }, + { + "epoch": 5.81, + "learning_rate": 2.0971428571428572e-05, + "loss": 1.2633, + "step": 1016 + }, + { + "epoch": 5.81, + "learning_rate": 2.0942857142857146e-05, + "loss": 1.2989, + "step": 1017 + }, + { + "epoch": 5.82, + "learning_rate": 2.0914285714285713e-05, + "loss": 1.2302, + "step": 1018 + }, + { + "epoch": 5.82, + "learning_rate": 2.0885714285714287e-05, + "loss": 1.2577, + "step": 1019 + }, + { + "epoch": 5.83, + "learning_rate": 2.0857142857142857e-05, + "loss": 1.2848, + "step": 1020 + }, + { + "epoch": 5.83, + "learning_rate": 2.082857142857143e-05, + "loss": 1.3636, + "step": 1021 + }, + { + "epoch": 5.84, + "learning_rate": 2.08e-05, + "loss": 1.2772, + "step": 1022 + }, + { + "epoch": 5.85, + "learning_rate": 2.077142857142857e-05, + "loss": 1.2869, + "step": 1023 + }, + { + "epoch": 5.85, + "learning_rate": 2.0742857142857145e-05, + "loss": 1.2698, + "step": 1024 + }, + { + "epoch": 5.86, + "learning_rate": 2.0714285714285718e-05, + "loss": 1.2281, + "step": 1025 + }, + { + "epoch": 5.86, + "learning_rate": 2.0685714285714285e-05, + "loss": 1.2564, + "step": 1026 + }, + { + "epoch": 5.87, + "learning_rate": 2.065714285714286e-05, + "loss": 1.32, + "step": 1027 + }, + { + "epoch": 5.87, + "learning_rate": 2.062857142857143e-05, + "loss": 1.2551, + "step": 1028 + }, + { + "epoch": 5.88, + "learning_rate": 2.06e-05, + "loss": 1.2259, + "step": 1029 + }, + { + "epoch": 5.89, + "learning_rate": 2.0571428571428573e-05, + "loss": 1.2862, + "step": 1030 + }, + { + "epoch": 5.89, + "learning_rate": 2.0542857142857143e-05, + "loss": 1.3579, + "step": 1031 + }, + { + "epoch": 5.9, + "learning_rate": 2.0514285714285717e-05, + "loss": 1.2287, + "step": 1032 + }, + { + "epoch": 5.9, + "learning_rate": 2.0485714285714287e-05, + "loss": 1.2708, + "step": 1033 + }, + { + "epoch": 5.91, + "learning_rate": 2.0457142857142857e-05, + "loss": 1.2862, + "step": 1034 + }, + { + "epoch": 5.91, + "learning_rate": 2.042857142857143e-05, + "loss": 1.223, + "step": 1035 + }, + { + "epoch": 5.92, + "learning_rate": 2.04e-05, + "loss": 1.3186, + "step": 1036 + }, + { + "epoch": 5.93, + "learning_rate": 2.037142857142857e-05, + "loss": 1.2676, + "step": 1037 + }, + { + "epoch": 5.93, + "learning_rate": 2.0342857142857145e-05, + "loss": 1.251, + "step": 1038 + }, + { + "epoch": 5.94, + "learning_rate": 2.0314285714285715e-05, + "loss": 1.2164, + "step": 1039 + }, + { + "epoch": 5.94, + "learning_rate": 2.0285714285714286e-05, + "loss": 1.2819, + "step": 1040 + }, + { + "epoch": 5.95, + "learning_rate": 2.025714285714286e-05, + "loss": 1.3483, + "step": 1041 + }, + { + "epoch": 5.95, + "learning_rate": 2.022857142857143e-05, + "loss": 1.2706, + "step": 1042 + }, + { + "epoch": 5.96, + "learning_rate": 2.0200000000000003e-05, + "loss": 1.3235, + "step": 1043 + }, + { + "epoch": 5.97, + "learning_rate": 2.0171428571428573e-05, + "loss": 1.261, + "step": 1044 + }, + { + "epoch": 5.97, + "learning_rate": 2.0142857142857144e-05, + "loss": 1.2001, + "step": 1045 + }, + { + "epoch": 5.98, + "learning_rate": 2.0114285714285717e-05, + "loss": 1.2315, + "step": 1046 + }, + { + "epoch": 5.98, + "learning_rate": 2.0085714285714284e-05, + "loss": 1.2189, + "step": 1047 + }, + { + "epoch": 5.99, + "learning_rate": 2.0057142857142858e-05, + "loss": 1.3158, + "step": 1048 + }, + { + "epoch": 5.99, + "learning_rate": 2.002857142857143e-05, + "loss": 1.2285, + "step": 1049 + }, + { + "epoch": 6.0, + "learning_rate": 2e-05, + "loss": 1.2588, + "step": 1050 + }, + { + "epoch": 6.01, + "learning_rate": 1.9971428571428572e-05, + "loss": 1.2846, + "step": 1051 + }, + { + "epoch": 6.01, + "learning_rate": 1.9942857142857142e-05, + "loss": 1.2334, + "step": 1052 + }, + { + "epoch": 6.02, + "learning_rate": 1.9914285714285716e-05, + "loss": 1.2025, + "step": 1053 + }, + { + "epoch": 6.02, + "learning_rate": 1.9885714285714286e-05, + "loss": 1.1503, + "step": 1054 + }, + { + "epoch": 6.03, + "learning_rate": 1.9857142857142856e-05, + "loss": 1.2338, + "step": 1055 + }, + { + "epoch": 6.03, + "learning_rate": 1.982857142857143e-05, + "loss": 1.2692, + "step": 1056 + }, + { + "epoch": 6.04, + "learning_rate": 1.9800000000000004e-05, + "loss": 1.218, + "step": 1057 + }, + { + "epoch": 6.05, + "learning_rate": 1.977142857142857e-05, + "loss": 1.1996, + "step": 1058 + }, + { + "epoch": 6.05, + "learning_rate": 1.9742857142857144e-05, + "loss": 1.1991, + "step": 1059 + }, + { + "epoch": 6.06, + "learning_rate": 1.9714285714285714e-05, + "loss": 1.2326, + "step": 1060 + }, + { + "epoch": 6.06, + "learning_rate": 1.9685714285714288e-05, + "loss": 1.2157, + "step": 1061 + }, + { + "epoch": 6.07, + "learning_rate": 1.9657142857142858e-05, + "loss": 1.2037, + "step": 1062 + }, + { + "epoch": 6.07, + "learning_rate": 1.962857142857143e-05, + "loss": 1.2286, + "step": 1063 + }, + { + "epoch": 6.08, + "learning_rate": 1.9600000000000002e-05, + "loss": 1.2453, + "step": 1064 + }, + { + "epoch": 6.09, + "learning_rate": 1.9571428571428572e-05, + "loss": 1.1621, + "step": 1065 + }, + { + "epoch": 6.09, + "learning_rate": 1.9542857142857143e-05, + "loss": 1.2732, + "step": 1066 + }, + { + "epoch": 6.1, + "learning_rate": 1.9514285714285716e-05, + "loss": 1.1938, + "step": 1067 + }, + { + "epoch": 6.1, + "learning_rate": 1.9485714285714286e-05, + "loss": 1.2676, + "step": 1068 + }, + { + "epoch": 6.11, + "learning_rate": 1.9457142857142857e-05, + "loss": 1.219, + "step": 1069 + }, + { + "epoch": 6.11, + "learning_rate": 1.942857142857143e-05, + "loss": 1.2599, + "step": 1070 + }, + { + "epoch": 6.12, + "learning_rate": 1.94e-05, + "loss": 1.1898, + "step": 1071 + }, + { + "epoch": 6.13, + "learning_rate": 1.9371428571428574e-05, + "loss": 1.2677, + "step": 1072 + }, + { + "epoch": 6.13, + "learning_rate": 1.9342857142857144e-05, + "loss": 1.1982, + "step": 1073 + }, + { + "epoch": 6.14, + "learning_rate": 1.9314285714285715e-05, + "loss": 1.2091, + "step": 1074 + }, + { + "epoch": 6.14, + "learning_rate": 1.928571428571429e-05, + "loss": 1.2033, + "step": 1075 + }, + { + "epoch": 6.15, + "learning_rate": 1.9257142857142855e-05, + "loss": 1.2865, + "step": 1076 + }, + { + "epoch": 6.15, + "learning_rate": 1.922857142857143e-05, + "loss": 1.2346, + "step": 1077 + }, + { + "epoch": 6.16, + "learning_rate": 1.9200000000000003e-05, + "loss": 1.2273, + "step": 1078 + }, + { + "epoch": 6.17, + "learning_rate": 1.9171428571428573e-05, + "loss": 1.1595, + "step": 1079 + }, + { + "epoch": 6.17, + "learning_rate": 1.9142857142857143e-05, + "loss": 1.252, + "step": 1080 + }, + { + "epoch": 6.18, + "learning_rate": 1.9114285714285717e-05, + "loss": 1.2, + "step": 1081 + }, + { + "epoch": 6.18, + "learning_rate": 1.9085714285714287e-05, + "loss": 1.1577, + "step": 1082 + }, + { + "epoch": 6.19, + "learning_rate": 1.9057142857142857e-05, + "loss": 1.2526, + "step": 1083 + }, + { + "epoch": 6.19, + "learning_rate": 1.9028571428571427e-05, + "loss": 1.2231, + "step": 1084 + }, + { + "epoch": 6.2, + "learning_rate": 1.9e-05, + "loss": 1.2639, + "step": 1085 + }, + { + "epoch": 6.21, + "learning_rate": 1.8971428571428575e-05, + "loss": 1.4006, + "step": 1086 + }, + { + "epoch": 6.21, + "learning_rate": 1.894285714285714e-05, + "loss": 1.2843, + "step": 1087 + }, + { + "epoch": 6.22, + "learning_rate": 1.8914285714285715e-05, + "loss": 1.3375, + "step": 1088 + }, + { + "epoch": 6.22, + "learning_rate": 1.888571428571429e-05, + "loss": 1.2015, + "step": 1089 + }, + { + "epoch": 6.23, + "learning_rate": 1.885714285714286e-05, + "loss": 1.2693, + "step": 1090 + }, + { + "epoch": 6.23, + "learning_rate": 1.882857142857143e-05, + "loss": 1.2676, + "step": 1091 + }, + { + "epoch": 6.24, + "learning_rate": 1.88e-05, + "loss": 1.2354, + "step": 1092 + }, + { + "epoch": 6.25, + "learning_rate": 1.8771428571428573e-05, + "loss": 1.2377, + "step": 1093 + }, + { + "epoch": 6.25, + "learning_rate": 1.8742857142857143e-05, + "loss": 1.2322, + "step": 1094 + }, + { + "epoch": 6.26, + "learning_rate": 1.8714285714285714e-05, + "loss": 1.3219, + "step": 1095 + }, + { + "epoch": 6.26, + "learning_rate": 1.8685714285714287e-05, + "loss": 1.2194, + "step": 1096 + }, + { + "epoch": 6.27, + "learning_rate": 1.8657142857142858e-05, + "loss": 1.2076, + "step": 1097 + }, + { + "epoch": 6.27, + "learning_rate": 1.8628571428571428e-05, + "loss": 1.1731, + "step": 1098 + }, + { + "epoch": 6.28, + "learning_rate": 1.86e-05, + "loss": 1.1856, + "step": 1099 + }, + { + "epoch": 6.29, + "learning_rate": 1.8571428571428572e-05, + "loss": 1.2499, + "step": 1100 + }, + { + "epoch": 6.29, + "learning_rate": 1.8542857142857142e-05, + "loss": 1.2047, + "step": 1101 + }, + { + "epoch": 6.3, + "learning_rate": 1.8514285714285716e-05, + "loss": 1.2324, + "step": 1102 + }, + { + "epoch": 6.3, + "learning_rate": 1.8485714285714286e-05, + "loss": 1.2061, + "step": 1103 + }, + { + "epoch": 6.31, + "learning_rate": 1.845714285714286e-05, + "loss": 1.2276, + "step": 1104 + }, + { + "epoch": 6.31, + "learning_rate": 1.842857142857143e-05, + "loss": 1.2223, + "step": 1105 + }, + { + "epoch": 6.32, + "learning_rate": 1.84e-05, + "loss": 1.2657, + "step": 1106 + }, + { + "epoch": 6.33, + "learning_rate": 1.8371428571428574e-05, + "loss": 1.2827, + "step": 1107 + }, + { + "epoch": 6.33, + "learning_rate": 1.8342857142857144e-05, + "loss": 1.3136, + "step": 1108 + }, + { + "epoch": 6.34, + "learning_rate": 1.8314285714285714e-05, + "loss": 1.2106, + "step": 1109 + }, + { + "epoch": 6.34, + "learning_rate": 1.8285714285714288e-05, + "loss": 1.1694, + "step": 1110 + }, + { + "epoch": 6.35, + "learning_rate": 1.8257142857142858e-05, + "loss": 1.1735, + "step": 1111 + }, + { + "epoch": 6.35, + "learning_rate": 1.8228571428571428e-05, + "loss": 1.2469, + "step": 1112 + }, + { + "epoch": 6.36, + "learning_rate": 1.8200000000000002e-05, + "loss": 1.275, + "step": 1113 + }, + { + "epoch": 6.37, + "learning_rate": 1.8171428571428572e-05, + "loss": 1.3411, + "step": 1114 + }, + { + "epoch": 6.37, + "learning_rate": 1.8142857142857146e-05, + "loss": 1.1947, + "step": 1115 + }, + { + "epoch": 6.38, + "learning_rate": 1.8114285714285713e-05, + "loss": 1.2564, + "step": 1116 + }, + { + "epoch": 6.38, + "learning_rate": 1.8085714285714286e-05, + "loss": 1.2547, + "step": 1117 + }, + { + "epoch": 6.39, + "learning_rate": 1.805714285714286e-05, + "loss": 1.2101, + "step": 1118 + }, + { + "epoch": 6.39, + "learning_rate": 1.802857142857143e-05, + "loss": 1.2641, + "step": 1119 + }, + { + "epoch": 6.4, + "learning_rate": 1.8e-05, + "loss": 1.3164, + "step": 1120 + }, + { + "epoch": 6.41, + "learning_rate": 1.797142857142857e-05, + "loss": 1.1706, + "step": 1121 + }, + { + "epoch": 6.41, + "learning_rate": 1.7942857142857144e-05, + "loss": 1.1826, + "step": 1122 + }, + { + "epoch": 6.42, + "learning_rate": 1.7914285714285715e-05, + "loss": 1.268, + "step": 1123 + }, + { + "epoch": 6.42, + "learning_rate": 1.7885714285714285e-05, + "loss": 1.2823, + "step": 1124 + }, + { + "epoch": 6.43, + "learning_rate": 1.785714285714286e-05, + "loss": 1.2254, + "step": 1125 + }, + { + "epoch": 6.43, + "learning_rate": 1.7828571428571432e-05, + "loss": 1.3097, + "step": 1126 + }, + { + "epoch": 6.44, + "learning_rate": 1.78e-05, + "loss": 1.2124, + "step": 1127 + }, + { + "epoch": 6.45, + "learning_rate": 1.7771428571428573e-05, + "loss": 1.1683, + "step": 1128 + }, + { + "epoch": 6.45, + "learning_rate": 1.7742857142857143e-05, + "loss": 1.1925, + "step": 1129 + }, + { + "epoch": 6.46, + "learning_rate": 1.7714285714285713e-05, + "loss": 1.2316, + "step": 1130 + }, + { + "epoch": 6.46, + "learning_rate": 1.7685714285714287e-05, + "loss": 1.314, + "step": 1131 + }, + { + "epoch": 6.47, + "learning_rate": 1.7657142857142857e-05, + "loss": 1.3132, + "step": 1132 + }, + { + "epoch": 6.47, + "learning_rate": 1.762857142857143e-05, + "loss": 1.2391, + "step": 1133 + }, + { + "epoch": 6.48, + "learning_rate": 1.76e-05, + "loss": 1.2648, + "step": 1134 + }, + { + "epoch": 6.49, + "learning_rate": 1.757142857142857e-05, + "loss": 1.2307, + "step": 1135 + }, + { + "epoch": 6.49, + "learning_rate": 1.7542857142857145e-05, + "loss": 1.3283, + "step": 1136 + }, + { + "epoch": 6.5, + "learning_rate": 1.7514285714285715e-05, + "loss": 1.1606, + "step": 1137 + }, + { + "epoch": 6.5, + "learning_rate": 1.7485714285714285e-05, + "loss": 1.1954, + "step": 1138 + }, + { + "epoch": 6.51, + "learning_rate": 1.745714285714286e-05, + "loss": 1.2318, + "step": 1139 + }, + { + "epoch": 6.51, + "learning_rate": 1.742857142857143e-05, + "loss": 1.231, + "step": 1140 + }, + { + "epoch": 6.52, + "learning_rate": 1.74e-05, + "loss": 1.1751, + "step": 1141 + }, + { + "epoch": 6.53, + "learning_rate": 1.7371428571428573e-05, + "loss": 1.2806, + "step": 1142 + }, + { + "epoch": 6.53, + "learning_rate": 1.7342857142857143e-05, + "loss": 1.1632, + "step": 1143 + }, + { + "epoch": 6.54, + "learning_rate": 1.7314285714285717e-05, + "loss": 1.1881, + "step": 1144 + }, + { + "epoch": 6.54, + "learning_rate": 1.7285714285714287e-05, + "loss": 1.189, + "step": 1145 + }, + { + "epoch": 6.55, + "learning_rate": 1.7257142857142857e-05, + "loss": 1.2705, + "step": 1146 + }, + { + "epoch": 6.55, + "learning_rate": 1.722857142857143e-05, + "loss": 1.2645, + "step": 1147 + }, + { + "epoch": 6.56, + "learning_rate": 1.7199999999999998e-05, + "loss": 1.2979, + "step": 1148 + }, + { + "epoch": 6.57, + "learning_rate": 1.717142857142857e-05, + "loss": 1.2624, + "step": 1149 + }, + { + "epoch": 6.57, + "learning_rate": 1.7142857142857145e-05, + "loss": 1.2362, + "step": 1150 + }, + { + "epoch": 6.58, + "learning_rate": 1.7114285714285715e-05, + "loss": 1.2824, + "step": 1151 + }, + { + "epoch": 6.58, + "learning_rate": 1.7085714285714286e-05, + "loss": 1.2423, + "step": 1152 + }, + { + "epoch": 6.59, + "learning_rate": 1.7057142857142856e-05, + "loss": 1.1784, + "step": 1153 + }, + { + "epoch": 6.59, + "learning_rate": 1.702857142857143e-05, + "loss": 1.2816, + "step": 1154 + }, + { + "epoch": 6.6, + "learning_rate": 1.7000000000000003e-05, + "loss": 1.2822, + "step": 1155 + }, + { + "epoch": 6.61, + "learning_rate": 1.697142857142857e-05, + "loss": 1.1859, + "step": 1156 + }, + { + "epoch": 6.61, + "learning_rate": 1.6942857142857144e-05, + "loss": 1.1932, + "step": 1157 + }, + { + "epoch": 6.62, + "learning_rate": 1.6914285714285717e-05, + "loss": 1.2268, + "step": 1158 + }, + { + "epoch": 6.62, + "learning_rate": 1.6885714285714284e-05, + "loss": 1.2129, + "step": 1159 + }, + { + "epoch": 6.63, + "learning_rate": 1.6857142857142858e-05, + "loss": 1.2325, + "step": 1160 + }, + { + "epoch": 6.63, + "learning_rate": 1.6828571428571428e-05, + "loss": 1.3025, + "step": 1161 + }, + { + "epoch": 6.64, + "learning_rate": 1.6800000000000002e-05, + "loss": 1.2879, + "step": 1162 + }, + { + "epoch": 6.65, + "learning_rate": 1.6771428571428572e-05, + "loss": 1.2264, + "step": 1163 + }, + { + "epoch": 6.65, + "learning_rate": 1.6742857142857142e-05, + "loss": 1.2469, + "step": 1164 + }, + { + "epoch": 6.66, + "learning_rate": 1.6714285714285716e-05, + "loss": 1.2385, + "step": 1165 + }, + { + "epoch": 6.66, + "learning_rate": 1.6685714285714286e-05, + "loss": 1.2549, + "step": 1166 + }, + { + "epoch": 6.67, + "learning_rate": 1.6657142857142856e-05, + "loss": 1.2005, + "step": 1167 + }, + { + "epoch": 6.67, + "learning_rate": 1.662857142857143e-05, + "loss": 1.2035, + "step": 1168 + }, + { + "epoch": 6.68, + "learning_rate": 1.66e-05, + "loss": 1.2198, + "step": 1169 + }, + { + "epoch": 6.69, + "learning_rate": 1.657142857142857e-05, + "loss": 1.2112, + "step": 1170 + }, + { + "epoch": 6.69, + "learning_rate": 1.6542857142857144e-05, + "loss": 1.2061, + "step": 1171 + }, + { + "epoch": 6.7, + "learning_rate": 1.6514285714285714e-05, + "loss": 1.2431, + "step": 1172 + }, + { + "epoch": 6.7, + "learning_rate": 1.6485714285714288e-05, + "loss": 1.2181, + "step": 1173 + }, + { + "epoch": 6.71, + "learning_rate": 1.645714285714286e-05, + "loss": 1.296, + "step": 1174 + }, + { + "epoch": 6.71, + "learning_rate": 1.642857142857143e-05, + "loss": 1.2314, + "step": 1175 + }, + { + "epoch": 6.72, + "learning_rate": 1.6400000000000002e-05, + "loss": 1.2493, + "step": 1176 + }, + { + "epoch": 6.73, + "learning_rate": 1.6371428571428572e-05, + "loss": 1.1707, + "step": 1177 + }, + { + "epoch": 6.73, + "learning_rate": 1.6342857142857143e-05, + "loss": 1.2368, + "step": 1178 + }, + { + "epoch": 6.74, + "learning_rate": 1.6314285714285716e-05, + "loss": 1.2585, + "step": 1179 + }, + { + "epoch": 6.74, + "learning_rate": 1.6285714285714287e-05, + "loss": 1.1886, + "step": 1180 + }, + { + "epoch": 6.75, + "learning_rate": 1.6257142857142857e-05, + "loss": 1.2597, + "step": 1181 + }, + { + "epoch": 6.75, + "learning_rate": 1.622857142857143e-05, + "loss": 1.1979, + "step": 1182 + }, + { + "epoch": 6.76, + "learning_rate": 1.62e-05, + "loss": 1.2213, + "step": 1183 + }, + { + "epoch": 6.77, + "learning_rate": 1.6171428571428574e-05, + "loss": 1.2825, + "step": 1184 + }, + { + "epoch": 6.77, + "learning_rate": 1.614285714285714e-05, + "loss": 1.2104, + "step": 1185 + }, + { + "epoch": 6.78, + "learning_rate": 1.6114285714285715e-05, + "loss": 1.2539, + "step": 1186 + }, + { + "epoch": 6.78, + "learning_rate": 1.608571428571429e-05, + "loss": 1.2486, + "step": 1187 + }, + { + "epoch": 6.79, + "learning_rate": 1.6057142857142855e-05, + "loss": 1.1958, + "step": 1188 + }, + { + "epoch": 6.79, + "learning_rate": 1.602857142857143e-05, + "loss": 1.2434, + "step": 1189 + }, + { + "epoch": 6.8, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.2585, + "step": 1190 + }, + { + "epoch": 6.81, + "learning_rate": 1.5971428571428573e-05, + "loss": 1.1735, + "step": 1191 + }, + { + "epoch": 6.81, + "learning_rate": 1.5942857142857143e-05, + "loss": 1.19, + "step": 1192 + }, + { + "epoch": 6.82, + "learning_rate": 1.5914285714285713e-05, + "loss": 1.2522, + "step": 1193 + }, + { + "epoch": 6.82, + "learning_rate": 1.5885714285714287e-05, + "loss": 1.2143, + "step": 1194 + }, + { + "epoch": 6.83, + "learning_rate": 1.5857142857142857e-05, + "loss": 1.1572, + "step": 1195 + }, + { + "epoch": 6.83, + "learning_rate": 1.5828571428571428e-05, + "loss": 1.1969, + "step": 1196 + }, + { + "epoch": 6.84, + "learning_rate": 1.58e-05, + "loss": 1.1789, + "step": 1197 + }, + { + "epoch": 6.85, + "learning_rate": 1.577142857142857e-05, + "loss": 1.2163, + "step": 1198 + }, + { + "epoch": 6.85, + "learning_rate": 1.574285714285714e-05, + "loss": 1.2201, + "step": 1199 + }, + { + "epoch": 6.86, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.2394, + "step": 1200 + }, + { + "epoch": 6.86, + "learning_rate": 1.5685714285714286e-05, + "loss": 1.1889, + "step": 1201 + }, + { + "epoch": 6.87, + "learning_rate": 1.565714285714286e-05, + "loss": 1.2453, + "step": 1202 + }, + { + "epoch": 6.87, + "learning_rate": 1.562857142857143e-05, + "loss": 1.2449, + "step": 1203 + }, + { + "epoch": 6.88, + "learning_rate": 1.56e-05, + "loss": 1.2958, + "step": 1204 + }, + { + "epoch": 6.89, + "learning_rate": 1.5571428571428573e-05, + "loss": 1.1921, + "step": 1205 + }, + { + "epoch": 6.89, + "learning_rate": 1.5542857142857144e-05, + "loss": 1.1871, + "step": 1206 + }, + { + "epoch": 6.9, + "learning_rate": 1.5514285714285714e-05, + "loss": 1.2974, + "step": 1207 + }, + { + "epoch": 6.9, + "learning_rate": 1.5485714285714287e-05, + "loss": 1.1678, + "step": 1208 + }, + { + "epoch": 6.91, + "learning_rate": 1.5457142857142858e-05, + "loss": 1.3504, + "step": 1209 + }, + { + "epoch": 6.91, + "learning_rate": 1.5428571428571428e-05, + "loss": 1.2325, + "step": 1210 + }, + { + "epoch": 6.92, + "learning_rate": 1.54e-05, + "loss": 1.1954, + "step": 1211 + }, + { + "epoch": 6.93, + "learning_rate": 1.5371428571428572e-05, + "loss": 1.2511, + "step": 1212 + }, + { + "epoch": 6.93, + "learning_rate": 1.5342857142857146e-05, + "loss": 1.2351, + "step": 1213 + }, + { + "epoch": 6.94, + "learning_rate": 1.5314285714285716e-05, + "loss": 1.2587, + "step": 1214 + }, + { + "epoch": 6.94, + "learning_rate": 1.5285714285714286e-05, + "loss": 1.2194, + "step": 1215 + }, + { + "epoch": 6.95, + "learning_rate": 1.5257142857142858e-05, + "loss": 1.1658, + "step": 1216 + }, + { + "epoch": 6.95, + "learning_rate": 1.5228571428571428e-05, + "loss": 1.187, + "step": 1217 + }, + { + "epoch": 6.96, + "learning_rate": 1.52e-05, + "loss": 1.197, + "step": 1218 + }, + { + "epoch": 6.97, + "learning_rate": 1.5171428571428572e-05, + "loss": 1.2339, + "step": 1219 + }, + { + "epoch": 6.97, + "learning_rate": 1.5142857142857144e-05, + "loss": 1.2621, + "step": 1220 + }, + { + "epoch": 6.98, + "learning_rate": 1.5114285714285714e-05, + "loss": 1.2397, + "step": 1221 + }, + { + "epoch": 6.98, + "learning_rate": 1.5085714285714286e-05, + "loss": 1.2295, + "step": 1222 + }, + { + "epoch": 6.99, + "learning_rate": 1.5057142857142858e-05, + "loss": 1.2575, + "step": 1223 + }, + { + "epoch": 6.99, + "learning_rate": 1.5028571428571428e-05, + "loss": 1.1525, + "step": 1224 + }, + { + "epoch": 7.0, + "learning_rate": 1.5e-05, + "loss": 1.2887, + "step": 1225 + }, + { + "epoch": 7.01, + "learning_rate": 1.4971428571428572e-05, + "loss": 1.2473, + "step": 1226 + }, + { + "epoch": 7.01, + "learning_rate": 1.4942857142857144e-05, + "loss": 1.1196, + "step": 1227 + }, + { + "epoch": 7.02, + "learning_rate": 1.4914285714285715e-05, + "loss": 1.2251, + "step": 1228 + }, + { + "epoch": 7.02, + "learning_rate": 1.4885714285714286e-05, + "loss": 1.1242, + "step": 1229 + }, + { + "epoch": 7.03, + "learning_rate": 1.4857142857142858e-05, + "loss": 1.1919, + "step": 1230 + }, + { + "epoch": 7.03, + "learning_rate": 1.482857142857143e-05, + "loss": 1.1957, + "step": 1231 + }, + { + "epoch": 7.04, + "learning_rate": 1.48e-05, + "loss": 1.2005, + "step": 1232 + }, + { + "epoch": 7.05, + "learning_rate": 1.4771428571428573e-05, + "loss": 1.179, + "step": 1233 + }, + { + "epoch": 7.05, + "learning_rate": 1.4742857142857144e-05, + "loss": 1.2359, + "step": 1234 + }, + { + "epoch": 7.06, + "learning_rate": 1.4714285714285713e-05, + "loss": 1.259, + "step": 1235 + }, + { + "epoch": 7.06, + "learning_rate": 1.4685714285714287e-05, + "loss": 1.2717, + "step": 1236 + }, + { + "epoch": 7.07, + "learning_rate": 1.4657142857142859e-05, + "loss": 1.0816, + "step": 1237 + }, + { + "epoch": 7.07, + "learning_rate": 1.462857142857143e-05, + "loss": 1.2102, + "step": 1238 + }, + { + "epoch": 7.08, + "learning_rate": 1.4599999999999999e-05, + "loss": 1.181, + "step": 1239 + }, + { + "epoch": 7.09, + "learning_rate": 1.4571428571428573e-05, + "loss": 1.2051, + "step": 1240 + }, + { + "epoch": 7.09, + "learning_rate": 1.4542857142857145e-05, + "loss": 1.1448, + "step": 1241 + }, + { + "epoch": 7.1, + "learning_rate": 1.4514285714285713e-05, + "loss": 1.1301, + "step": 1242 + }, + { + "epoch": 7.1, + "learning_rate": 1.4485714285714285e-05, + "loss": 1.2258, + "step": 1243 + }, + { + "epoch": 7.11, + "learning_rate": 1.4457142857142857e-05, + "loss": 1.23, + "step": 1244 + }, + { + "epoch": 7.11, + "learning_rate": 1.442857142857143e-05, + "loss": 1.1806, + "step": 1245 + }, + { + "epoch": 7.12, + "learning_rate": 1.44e-05, + "loss": 1.2021, + "step": 1246 + }, + { + "epoch": 7.13, + "learning_rate": 1.4371428571428571e-05, + "loss": 1.268, + "step": 1247 + }, + { + "epoch": 7.13, + "learning_rate": 1.4342857142857143e-05, + "loss": 1.238, + "step": 1248 + }, + { + "epoch": 7.14, + "learning_rate": 1.4314285714285717e-05, + "loss": 1.1767, + "step": 1249 + }, + { + "epoch": 7.14, + "learning_rate": 1.4285714285714285e-05, + "loss": 1.2127, + "step": 1250 + }, + { + "epoch": 7.15, + "learning_rate": 1.4257142857142857e-05, + "loss": 1.2837, + "step": 1251 + }, + { + "epoch": 7.15, + "learning_rate": 1.422857142857143e-05, + "loss": 1.1183, + "step": 1252 + }, + { + "epoch": 7.16, + "learning_rate": 1.42e-05, + "loss": 1.161, + "step": 1253 + }, + { + "epoch": 7.17, + "learning_rate": 1.4171428571428572e-05, + "loss": 1.1957, + "step": 1254 + }, + { + "epoch": 7.17, + "learning_rate": 1.4142857142857143e-05, + "loss": 1.1916, + "step": 1255 + }, + { + "epoch": 7.18, + "learning_rate": 1.4114285714285715e-05, + "loss": 1.1816, + "step": 1256 + }, + { + "epoch": 7.18, + "learning_rate": 1.4085714285714286e-05, + "loss": 1.2454, + "step": 1257 + }, + { + "epoch": 7.19, + "learning_rate": 1.4057142857142858e-05, + "loss": 1.2272, + "step": 1258 + }, + { + "epoch": 7.19, + "learning_rate": 1.402857142857143e-05, + "loss": 1.1294, + "step": 1259 + }, + { + "epoch": 7.2, + "learning_rate": 1.4000000000000001e-05, + "loss": 1.2474, + "step": 1260 + }, + { + "epoch": 7.21, + "learning_rate": 1.3971428571428572e-05, + "loss": 1.1345, + "step": 1261 + }, + { + "epoch": 7.21, + "learning_rate": 1.3942857142857144e-05, + "loss": 1.1249, + "step": 1262 + }, + { + "epoch": 7.22, + "learning_rate": 1.3914285714285716e-05, + "loss": 1.1739, + "step": 1263 + }, + { + "epoch": 7.22, + "learning_rate": 1.3885714285714286e-05, + "loss": 1.2192, + "step": 1264 + }, + { + "epoch": 7.23, + "learning_rate": 1.3857142857142858e-05, + "loss": 1.2375, + "step": 1265 + }, + { + "epoch": 7.23, + "learning_rate": 1.382857142857143e-05, + "loss": 1.1861, + "step": 1266 + }, + { + "epoch": 7.24, + "learning_rate": 1.3800000000000002e-05, + "loss": 1.1637, + "step": 1267 + }, + { + "epoch": 7.25, + "learning_rate": 1.3771428571428572e-05, + "loss": 1.1864, + "step": 1268 + }, + { + "epoch": 7.25, + "learning_rate": 1.3742857142857144e-05, + "loss": 1.2389, + "step": 1269 + }, + { + "epoch": 7.26, + "learning_rate": 1.3714285714285716e-05, + "loss": 1.1863, + "step": 1270 + }, + { + "epoch": 7.26, + "learning_rate": 1.3685714285714284e-05, + "loss": 1.1982, + "step": 1271 + }, + { + "epoch": 7.27, + "learning_rate": 1.3657142857142858e-05, + "loss": 1.2099, + "step": 1272 + }, + { + "epoch": 7.27, + "learning_rate": 1.362857142857143e-05, + "loss": 1.2451, + "step": 1273 + }, + { + "epoch": 7.28, + "learning_rate": 1.3600000000000002e-05, + "loss": 1.2528, + "step": 1274 + }, + { + "epoch": 7.29, + "learning_rate": 1.357142857142857e-05, + "loss": 1.1299, + "step": 1275 + }, + { + "epoch": 7.29, + "learning_rate": 1.3542857142857142e-05, + "loss": 1.1795, + "step": 1276 + }, + { + "epoch": 7.3, + "learning_rate": 1.3514285714285716e-05, + "loss": 1.2197, + "step": 1277 + }, + { + "epoch": 7.3, + "learning_rate": 1.3485714285714288e-05, + "loss": 1.1641, + "step": 1278 + }, + { + "epoch": 7.31, + "learning_rate": 1.3457142857142857e-05, + "loss": 1.1615, + "step": 1279 + }, + { + "epoch": 7.31, + "learning_rate": 1.3428571428571429e-05, + "loss": 1.2236, + "step": 1280 + }, + { + "epoch": 7.32, + "learning_rate": 1.3400000000000002e-05, + "loss": 1.1911, + "step": 1281 + }, + { + "epoch": 7.33, + "learning_rate": 1.337142857142857e-05, + "loss": 1.2689, + "step": 1282 + }, + { + "epoch": 7.33, + "learning_rate": 1.3342857142857143e-05, + "loss": 1.1873, + "step": 1283 + }, + { + "epoch": 7.34, + "learning_rate": 1.3314285714285715e-05, + "loss": 1.1661, + "step": 1284 + }, + { + "epoch": 7.34, + "learning_rate": 1.3285714285714288e-05, + "loss": 1.1758, + "step": 1285 + }, + { + "epoch": 7.35, + "learning_rate": 1.3257142857142857e-05, + "loss": 1.2637, + "step": 1286 + }, + { + "epoch": 7.35, + "learning_rate": 1.3228571428571429e-05, + "loss": 1.2458, + "step": 1287 + }, + { + "epoch": 7.36, + "learning_rate": 1.32e-05, + "loss": 1.2376, + "step": 1288 + }, + { + "epoch": 7.37, + "learning_rate": 1.3171428571428571e-05, + "loss": 1.22, + "step": 1289 + }, + { + "epoch": 7.37, + "learning_rate": 1.3142857142857143e-05, + "loss": 1.1867, + "step": 1290 + }, + { + "epoch": 7.38, + "learning_rate": 1.3114285714285715e-05, + "loss": 1.1729, + "step": 1291 + }, + { + "epoch": 7.38, + "learning_rate": 1.3085714285714287e-05, + "loss": 1.2337, + "step": 1292 + }, + { + "epoch": 7.39, + "learning_rate": 1.3057142857142857e-05, + "loss": 1.1378, + "step": 1293 + }, + { + "epoch": 7.39, + "learning_rate": 1.3028571428571429e-05, + "loss": 1.2629, + "step": 1294 + }, + { + "epoch": 7.4, + "learning_rate": 1.3000000000000001e-05, + "loss": 1.1673, + "step": 1295 + }, + { + "epoch": 7.41, + "learning_rate": 1.2971428571428573e-05, + "loss": 1.2138, + "step": 1296 + }, + { + "epoch": 7.41, + "learning_rate": 1.2942857142857143e-05, + "loss": 1.1845, + "step": 1297 + }, + { + "epoch": 7.42, + "learning_rate": 1.2914285714285715e-05, + "loss": 1.1703, + "step": 1298 + }, + { + "epoch": 7.42, + "learning_rate": 1.2885714285714287e-05, + "loss": 1.2146, + "step": 1299 + }, + { + "epoch": 7.43, + "learning_rate": 1.2857142857142857e-05, + "loss": 1.2226, + "step": 1300 + }, + { + "epoch": 7.43, + "learning_rate": 1.282857142857143e-05, + "loss": 1.2902, + "step": 1301 + }, + { + "epoch": 7.44, + "learning_rate": 1.2800000000000001e-05, + "loss": 1.2114, + "step": 1302 + }, + { + "epoch": 7.45, + "learning_rate": 1.2771428571428573e-05, + "loss": 1.1652, + "step": 1303 + }, + { + "epoch": 7.45, + "learning_rate": 1.2742857142857143e-05, + "loss": 1.1793, + "step": 1304 + }, + { + "epoch": 7.46, + "learning_rate": 1.2714285714285715e-05, + "loss": 1.2063, + "step": 1305 + }, + { + "epoch": 7.46, + "learning_rate": 1.2685714285714287e-05, + "loss": 1.248, + "step": 1306 + }, + { + "epoch": 7.47, + "learning_rate": 1.2657142857142859e-05, + "loss": 1.2503, + "step": 1307 + }, + { + "epoch": 7.47, + "learning_rate": 1.2628571428571428e-05, + "loss": 1.2242, + "step": 1308 + }, + { + "epoch": 7.48, + "learning_rate": 1.2600000000000001e-05, + "loss": 1.1112, + "step": 1309 + }, + { + "epoch": 7.49, + "learning_rate": 1.2571428571428573e-05, + "loss": 1.1516, + "step": 1310 + }, + { + "epoch": 7.49, + "learning_rate": 1.2542857142857142e-05, + "loss": 1.2051, + "step": 1311 + }, + { + "epoch": 7.5, + "learning_rate": 1.2514285714285714e-05, + "loss": 1.1921, + "step": 1312 + }, + { + "epoch": 7.5, + "learning_rate": 1.2485714285714287e-05, + "loss": 1.1793, + "step": 1313 + }, + { + "epoch": 7.51, + "learning_rate": 1.2457142857142858e-05, + "loss": 1.2241, + "step": 1314 + }, + { + "epoch": 7.51, + "learning_rate": 1.242857142857143e-05, + "loss": 1.2178, + "step": 1315 + }, + { + "epoch": 7.52, + "learning_rate": 1.24e-05, + "loss": 1.2603, + "step": 1316 + }, + { + "epoch": 7.53, + "learning_rate": 1.2371428571428574e-05, + "loss": 1.2178, + "step": 1317 + }, + { + "epoch": 7.53, + "learning_rate": 1.2342857142857144e-05, + "loss": 1.2263, + "step": 1318 + }, + { + "epoch": 7.54, + "learning_rate": 1.2314285714285714e-05, + "loss": 1.1786, + "step": 1319 + }, + { + "epoch": 7.54, + "learning_rate": 1.2285714285714286e-05, + "loss": 1.2788, + "step": 1320 + }, + { + "epoch": 7.55, + "learning_rate": 1.2257142857142858e-05, + "loss": 1.1498, + "step": 1321 + }, + { + "epoch": 7.55, + "learning_rate": 1.222857142857143e-05, + "loss": 1.2405, + "step": 1322 + }, + { + "epoch": 7.56, + "learning_rate": 1.22e-05, + "loss": 1.164, + "step": 1323 + }, + { + "epoch": 7.57, + "learning_rate": 1.2171428571428572e-05, + "loss": 1.2008, + "step": 1324 + }, + { + "epoch": 7.57, + "learning_rate": 1.2142857142857144e-05, + "loss": 1.2171, + "step": 1325 + }, + { + "epoch": 7.58, + "learning_rate": 1.2114285714285716e-05, + "loss": 1.2831, + "step": 1326 + }, + { + "epoch": 7.58, + "learning_rate": 1.2085714285714286e-05, + "loss": 1.1482, + "step": 1327 + }, + { + "epoch": 7.59, + "learning_rate": 1.2057142857142856e-05, + "loss": 1.1889, + "step": 1328 + }, + { + "epoch": 7.59, + "learning_rate": 1.202857142857143e-05, + "loss": 1.1867, + "step": 1329 + }, + { + "epoch": 7.6, + "learning_rate": 1.2e-05, + "loss": 1.1787, + "step": 1330 + }, + { + "epoch": 7.61, + "learning_rate": 1.1971428571428572e-05, + "loss": 1.2058, + "step": 1331 + }, + { + "epoch": 7.61, + "learning_rate": 1.1942857142857142e-05, + "loss": 1.0851, + "step": 1332 + }, + { + "epoch": 7.62, + "learning_rate": 1.1914285714285716e-05, + "loss": 1.1733, + "step": 1333 + }, + { + "epoch": 7.62, + "learning_rate": 1.1885714285714286e-05, + "loss": 1.198, + "step": 1334 + }, + { + "epoch": 7.63, + "learning_rate": 1.1857142857142858e-05, + "loss": 1.187, + "step": 1335 + }, + { + "epoch": 7.63, + "learning_rate": 1.1828571428571429e-05, + "loss": 1.2155, + "step": 1336 + }, + { + "epoch": 7.64, + "learning_rate": 1.18e-05, + "loss": 1.2444, + "step": 1337 + }, + { + "epoch": 7.65, + "learning_rate": 1.1771428571428572e-05, + "loss": 1.191, + "step": 1338 + }, + { + "epoch": 7.65, + "learning_rate": 1.1742857142857143e-05, + "loss": 1.1728, + "step": 1339 + }, + { + "epoch": 7.66, + "learning_rate": 1.1714285714285715e-05, + "loss": 1.1744, + "step": 1340 + }, + { + "epoch": 7.66, + "learning_rate": 1.1685714285714287e-05, + "loss": 1.2009, + "step": 1341 + }, + { + "epoch": 7.67, + "learning_rate": 1.1657142857142859e-05, + "loss": 1.2131, + "step": 1342 + }, + { + "epoch": 7.67, + "learning_rate": 1.1628571428571429e-05, + "loss": 1.2086, + "step": 1343 + }, + { + "epoch": 7.68, + "learning_rate": 1.16e-05, + "loss": 1.181, + "step": 1344 + }, + { + "epoch": 7.69, + "learning_rate": 1.1571428571428573e-05, + "loss": 1.1559, + "step": 1345 + }, + { + "epoch": 7.69, + "learning_rate": 1.1542857142857143e-05, + "loss": 1.2186, + "step": 1346 + }, + { + "epoch": 7.7, + "learning_rate": 1.1514285714285715e-05, + "loss": 1.0891, + "step": 1347 + }, + { + "epoch": 7.7, + "learning_rate": 1.1485714285714285e-05, + "loss": 1.2447, + "step": 1348 + }, + { + "epoch": 7.71, + "learning_rate": 1.1457142857142859e-05, + "loss": 1.1734, + "step": 1349 + }, + { + "epoch": 7.71, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.1485, + "step": 1350 + }, + { + "epoch": 7.72, + "learning_rate": 1.1400000000000001e-05, + "loss": 1.237, + "step": 1351 + }, + { + "epoch": 7.73, + "learning_rate": 1.1371428571428571e-05, + "loss": 1.2379, + "step": 1352 + }, + { + "epoch": 7.73, + "learning_rate": 1.1342857142857143e-05, + "loss": 1.232, + "step": 1353 + }, + { + "epoch": 7.74, + "learning_rate": 1.1314285714285715e-05, + "loss": 1.2107, + "step": 1354 + }, + { + "epoch": 7.74, + "learning_rate": 1.1285714285714285e-05, + "loss": 1.2678, + "step": 1355 + }, + { + "epoch": 7.75, + "learning_rate": 1.1257142857142857e-05, + "loss": 1.1586, + "step": 1356 + }, + { + "epoch": 7.75, + "learning_rate": 1.122857142857143e-05, + "loss": 1.1271, + "step": 1357 + }, + { + "epoch": 7.76, + "learning_rate": 1.1200000000000001e-05, + "loss": 1.2166, + "step": 1358 + }, + { + "epoch": 7.77, + "learning_rate": 1.1171428571428571e-05, + "loss": 1.1643, + "step": 1359 + }, + { + "epoch": 7.77, + "learning_rate": 1.1142857142857143e-05, + "loss": 1.2628, + "step": 1360 + }, + { + "epoch": 7.78, + "learning_rate": 1.1114285714285715e-05, + "loss": 1.2173, + "step": 1361 + }, + { + "epoch": 7.78, + "learning_rate": 1.1085714285714287e-05, + "loss": 1.1893, + "step": 1362 + }, + { + "epoch": 7.79, + "learning_rate": 1.1057142857142858e-05, + "loss": 1.2319, + "step": 1363 + }, + { + "epoch": 7.79, + "learning_rate": 1.102857142857143e-05, + "loss": 1.183, + "step": 1364 + }, + { + "epoch": 7.8, + "learning_rate": 1.1000000000000001e-05, + "loss": 1.2344, + "step": 1365 + }, + { + "epoch": 7.81, + "learning_rate": 1.0971428571428572e-05, + "loss": 1.1551, + "step": 1366 + }, + { + "epoch": 7.81, + "learning_rate": 1.0942857142857144e-05, + "loss": 1.1417, + "step": 1367 + }, + { + "epoch": 7.82, + "learning_rate": 1.0914285714285714e-05, + "loss": 1.232, + "step": 1368 + }, + { + "epoch": 7.82, + "learning_rate": 1.0885714285714286e-05, + "loss": 1.2065, + "step": 1369 + }, + { + "epoch": 7.83, + "learning_rate": 1.0857142857142858e-05, + "loss": 1.2284, + "step": 1370 + }, + { + "epoch": 7.83, + "learning_rate": 1.082857142857143e-05, + "loss": 1.1259, + "step": 1371 + }, + { + "epoch": 7.84, + "learning_rate": 1.08e-05, + "loss": 1.2002, + "step": 1372 + }, + { + "epoch": 7.85, + "learning_rate": 1.0771428571428572e-05, + "loss": 1.1922, + "step": 1373 + }, + { + "epoch": 7.85, + "learning_rate": 1.0742857142857144e-05, + "loss": 1.1551, + "step": 1374 + }, + { + "epoch": 7.86, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.2329, + "step": 1375 + }, + { + "epoch": 7.86, + "learning_rate": 1.0685714285714286e-05, + "loss": 1.2197, + "step": 1376 + }, + { + "epoch": 7.87, + "learning_rate": 1.0657142857142858e-05, + "loss": 1.1409, + "step": 1377 + }, + { + "epoch": 7.87, + "learning_rate": 1.062857142857143e-05, + "loss": 1.1619, + "step": 1378 + }, + { + "epoch": 7.88, + "learning_rate": 1.06e-05, + "loss": 1.1285, + "step": 1379 + }, + { + "epoch": 7.89, + "learning_rate": 1.0571428571428572e-05, + "loss": 1.2532, + "step": 1380 + }, + { + "epoch": 7.89, + "learning_rate": 1.0542857142857144e-05, + "loss": 1.218, + "step": 1381 + }, + { + "epoch": 7.9, + "learning_rate": 1.0514285714285716e-05, + "loss": 1.2115, + "step": 1382 + }, + { + "epoch": 7.9, + "learning_rate": 1.0485714285714286e-05, + "loss": 1.1195, + "step": 1383 + }, + { + "epoch": 7.91, + "learning_rate": 1.0457142857142856e-05, + "loss": 1.1154, + "step": 1384 + }, + { + "epoch": 7.91, + "learning_rate": 1.0428571428571428e-05, + "loss": 1.1892, + "step": 1385 + }, + { + "epoch": 7.92, + "learning_rate": 1.04e-05, + "loss": 1.222, + "step": 1386 + }, + { + "epoch": 7.93, + "learning_rate": 1.0371428571428572e-05, + "loss": 1.0729, + "step": 1387 + }, + { + "epoch": 7.93, + "learning_rate": 1.0342857142857143e-05, + "loss": 1.2224, + "step": 1388 + }, + { + "epoch": 7.94, + "learning_rate": 1.0314285714285715e-05, + "loss": 1.1833, + "step": 1389 + }, + { + "epoch": 7.94, + "learning_rate": 1.0285714285714286e-05, + "loss": 1.248, + "step": 1390 + }, + { + "epoch": 7.95, + "learning_rate": 1.0257142857142858e-05, + "loss": 1.1831, + "step": 1391 + }, + { + "epoch": 7.95, + "learning_rate": 1.0228571428571429e-05, + "loss": 1.1667, + "step": 1392 + }, + { + "epoch": 7.96, + "learning_rate": 1.02e-05, + "loss": 1.2026, + "step": 1393 + }, + { + "epoch": 7.97, + "learning_rate": 1.0171428571428573e-05, + "loss": 1.172, + "step": 1394 + }, + { + "epoch": 7.97, + "learning_rate": 1.0142857142857143e-05, + "loss": 1.1752, + "step": 1395 + }, + { + "epoch": 7.98, + "learning_rate": 1.0114285714285715e-05, + "loss": 1.1991, + "step": 1396 + }, + { + "epoch": 7.98, + "learning_rate": 1.0085714285714287e-05, + "loss": 1.1911, + "step": 1397 + }, + { + "epoch": 7.99, + "learning_rate": 1.0057142857142859e-05, + "loss": 1.1975, + "step": 1398 + }, + { + "epoch": 7.99, + "learning_rate": 1.0028571428571429e-05, + "loss": 1.2783, + "step": 1399 + }, + { + "epoch": 8.0, + "learning_rate": 1e-05, + "loss": 1.3125, + "step": 1400 + }, + { + "epoch": 8.01, + "learning_rate": 9.971428571428571e-06, + "loss": 1.2041, + "step": 1401 + }, + { + "epoch": 8.01, + "learning_rate": 9.942857142857143e-06, + "loss": 1.0592, + "step": 1402 + }, + { + "epoch": 8.02, + "learning_rate": 9.914285714285715e-06, + "loss": 1.1759, + "step": 1403 + }, + { + "epoch": 8.02, + "learning_rate": 9.885714285714285e-06, + "loss": 1.1795, + "step": 1404 + }, + { + "epoch": 8.03, + "learning_rate": 9.857142857142857e-06, + "loss": 1.1374, + "step": 1405 + }, + { + "epoch": 8.03, + "learning_rate": 9.828571428571429e-06, + "loss": 1.1302, + "step": 1406 + }, + { + "epoch": 8.04, + "learning_rate": 9.800000000000001e-06, + "loss": 1.1266, + "step": 1407 + }, + { + "epoch": 8.05, + "learning_rate": 9.771428571428571e-06, + "loss": 1.2986, + "step": 1408 + }, + { + "epoch": 8.05, + "learning_rate": 9.742857142857143e-06, + "loss": 1.1811, + "step": 1409 + }, + { + "epoch": 8.06, + "learning_rate": 9.714285714285715e-06, + "loss": 1.1399, + "step": 1410 + }, + { + "epoch": 8.06, + "learning_rate": 9.685714285714287e-06, + "loss": 1.1527, + "step": 1411 + }, + { + "epoch": 8.07, + "learning_rate": 9.657142857142857e-06, + "loss": 1.2195, + "step": 1412 + }, + { + "epoch": 8.07, + "learning_rate": 9.628571428571428e-06, + "loss": 1.1753, + "step": 1413 + }, + { + "epoch": 8.08, + "learning_rate": 9.600000000000001e-06, + "loss": 1.1934, + "step": 1414 + }, + { + "epoch": 8.09, + "learning_rate": 9.571428571428572e-06, + "loss": 1.2099, + "step": 1415 + }, + { + "epoch": 8.09, + "learning_rate": 9.542857142857143e-06, + "loss": 1.2931, + "step": 1416 + }, + { + "epoch": 8.1, + "learning_rate": 9.514285714285714e-06, + "loss": 1.1303, + "step": 1417 + }, + { + "epoch": 8.1, + "learning_rate": 9.485714285714287e-06, + "loss": 1.173, + "step": 1418 + }, + { + "epoch": 8.11, + "learning_rate": 9.457142857142858e-06, + "loss": 1.1857, + "step": 1419 + }, + { + "epoch": 8.11, + "learning_rate": 9.42857142857143e-06, + "loss": 1.2425, + "step": 1420 + }, + { + "epoch": 8.12, + "learning_rate": 9.4e-06, + "loss": 1.2034, + "step": 1421 + }, + { + "epoch": 8.13, + "learning_rate": 9.371428571428572e-06, + "loss": 1.1445, + "step": 1422 + }, + { + "epoch": 8.13, + "learning_rate": 9.342857142857144e-06, + "loss": 1.1803, + "step": 1423 + }, + { + "epoch": 8.14, + "learning_rate": 9.314285714285714e-06, + "loss": 1.17, + "step": 1424 + }, + { + "epoch": 8.14, + "learning_rate": 9.285714285714286e-06, + "loss": 1.1949, + "step": 1425 + }, + { + "epoch": 8.15, + "learning_rate": 9.257142857142858e-06, + "loss": 1.2051, + "step": 1426 + }, + { + "epoch": 8.15, + "learning_rate": 9.22857142857143e-06, + "loss": 1.2093, + "step": 1427 + }, + { + "epoch": 8.16, + "learning_rate": 9.2e-06, + "loss": 1.2029, + "step": 1428 + }, + { + "epoch": 8.17, + "learning_rate": 9.171428571428572e-06, + "loss": 1.1428, + "step": 1429 + }, + { + "epoch": 8.17, + "learning_rate": 9.142857142857144e-06, + "loss": 1.1722, + "step": 1430 + }, + { + "epoch": 8.18, + "learning_rate": 9.114285714285714e-06, + "loss": 1.2178, + "step": 1431 + }, + { + "epoch": 8.18, + "learning_rate": 9.085714285714286e-06, + "loss": 1.2454, + "step": 1432 + }, + { + "epoch": 8.19, + "learning_rate": 9.057142857142856e-06, + "loss": 1.1531, + "step": 1433 + }, + { + "epoch": 8.19, + "learning_rate": 9.02857142857143e-06, + "loss": 1.0534, + "step": 1434 + }, + { + "epoch": 8.2, + "learning_rate": 9e-06, + "loss": 1.2329, + "step": 1435 + }, + { + "epoch": 8.21, + "learning_rate": 8.971428571428572e-06, + "loss": 1.1228, + "step": 1436 + }, + { + "epoch": 8.21, + "learning_rate": 8.942857142857142e-06, + "loss": 1.1843, + "step": 1437 + }, + { + "epoch": 8.22, + "learning_rate": 8.914285714285716e-06, + "loss": 1.1364, + "step": 1438 + }, + { + "epoch": 8.22, + "learning_rate": 8.885714285714286e-06, + "loss": 1.1261, + "step": 1439 + }, + { + "epoch": 8.23, + "learning_rate": 8.857142857142857e-06, + "loss": 1.119, + "step": 1440 + }, + { + "epoch": 8.23, + "learning_rate": 8.828571428571429e-06, + "loss": 1.1867, + "step": 1441 + }, + { + "epoch": 8.24, + "learning_rate": 8.8e-06, + "loss": 1.0948, + "step": 1442 + }, + { + "epoch": 8.25, + "learning_rate": 8.771428571428572e-06, + "loss": 1.0973, + "step": 1443 + }, + { + "epoch": 8.25, + "learning_rate": 8.742857142857143e-06, + "loss": 1.1451, + "step": 1444 + }, + { + "epoch": 8.26, + "learning_rate": 8.714285714285715e-06, + "loss": 1.1849, + "step": 1445 + }, + { + "epoch": 8.26, + "learning_rate": 8.685714285714287e-06, + "loss": 1.1819, + "step": 1446 + }, + { + "epoch": 8.27, + "learning_rate": 8.657142857142858e-06, + "loss": 1.1684, + "step": 1447 + }, + { + "epoch": 8.27, + "learning_rate": 8.628571428571429e-06, + "loss": 1.1166, + "step": 1448 + }, + { + "epoch": 8.28, + "learning_rate": 8.599999999999999e-06, + "loss": 1.1608, + "step": 1449 + }, + { + "epoch": 8.29, + "learning_rate": 8.571428571428573e-06, + "loss": 1.1536, + "step": 1450 + }, + { + "epoch": 8.29, + "learning_rate": 8.542857142857143e-06, + "loss": 1.0794, + "step": 1451 + }, + { + "epoch": 8.3, + "learning_rate": 8.514285714285715e-06, + "loss": 1.187, + "step": 1452 + }, + { + "epoch": 8.3, + "learning_rate": 8.485714285714285e-06, + "loss": 1.1096, + "step": 1453 + }, + { + "epoch": 8.31, + "learning_rate": 8.457142857142859e-06, + "loss": 1.2126, + "step": 1454 + }, + { + "epoch": 8.31, + "learning_rate": 8.428571428571429e-06, + "loss": 1.1528, + "step": 1455 + }, + { + "epoch": 8.32, + "learning_rate": 8.400000000000001e-06, + "loss": 1.1233, + "step": 1456 + }, + { + "epoch": 8.33, + "learning_rate": 8.371428571428571e-06, + "loss": 1.1533, + "step": 1457 + }, + { + "epoch": 8.33, + "learning_rate": 8.342857142857143e-06, + "loss": 1.1637, + "step": 1458 + }, + { + "epoch": 8.34, + "learning_rate": 8.314285714285715e-06, + "loss": 1.1799, + "step": 1459 + }, + { + "epoch": 8.34, + "learning_rate": 8.285714285714285e-06, + "loss": 1.2575, + "step": 1460 + }, + { + "epoch": 8.35, + "learning_rate": 8.257142857142857e-06, + "loss": 1.1258, + "step": 1461 + }, + { + "epoch": 8.35, + "learning_rate": 8.22857142857143e-06, + "loss": 1.2118, + "step": 1462 + }, + { + "epoch": 8.36, + "learning_rate": 8.200000000000001e-06, + "loss": 1.202, + "step": 1463 + }, + { + "epoch": 8.37, + "learning_rate": 8.171428571428571e-06, + "loss": 1.1769, + "step": 1464 + }, + { + "epoch": 8.37, + "learning_rate": 8.142857142857143e-06, + "loss": 1.1704, + "step": 1465 + }, + { + "epoch": 8.38, + "learning_rate": 8.114285714285715e-06, + "loss": 1.1668, + "step": 1466 + }, + { + "epoch": 8.38, + "learning_rate": 8.085714285714287e-06, + "loss": 1.1993, + "step": 1467 + }, + { + "epoch": 8.39, + "learning_rate": 8.057142857142857e-06, + "loss": 1.1184, + "step": 1468 + }, + { + "epoch": 8.39, + "learning_rate": 8.028571428571428e-06, + "loss": 1.2608, + "step": 1469 + }, + { + "epoch": 8.4, + "learning_rate": 8.000000000000001e-06, + "loss": 1.1715, + "step": 1470 + }, + { + "epoch": 8.41, + "learning_rate": 7.971428571428572e-06, + "loss": 1.1536, + "step": 1471 + }, + { + "epoch": 8.41, + "learning_rate": 7.942857142857144e-06, + "loss": 1.1514, + "step": 1472 + }, + { + "epoch": 8.42, + "learning_rate": 7.914285714285714e-06, + "loss": 1.1995, + "step": 1473 + }, + { + "epoch": 8.42, + "learning_rate": 7.885714285714286e-06, + "loss": 1.1469, + "step": 1474 + }, + { + "epoch": 8.43, + "learning_rate": 7.857142857142858e-06, + "loss": 1.1635, + "step": 1475 + }, + { + "epoch": 8.43, + "learning_rate": 7.82857142857143e-06, + "loss": 1.0898, + "step": 1476 + }, + { + "epoch": 8.44, + "learning_rate": 7.8e-06, + "loss": 1.1769, + "step": 1477 + }, + { + "epoch": 8.45, + "learning_rate": 7.771428571428572e-06, + "loss": 1.147, + "step": 1478 + }, + { + "epoch": 8.45, + "learning_rate": 7.742857142857144e-06, + "loss": 1.2148, + "step": 1479 + }, + { + "epoch": 8.46, + "learning_rate": 7.714285714285714e-06, + "loss": 1.239, + "step": 1480 + }, + { + "epoch": 8.46, + "learning_rate": 7.685714285714286e-06, + "loss": 1.1689, + "step": 1481 + }, + { + "epoch": 8.47, + "learning_rate": 7.657142857142858e-06, + "loss": 1.1541, + "step": 1482 + }, + { + "epoch": 8.47, + "learning_rate": 7.628571428571429e-06, + "loss": 1.1101, + "step": 1483 + }, + { + "epoch": 8.48, + "learning_rate": 7.6e-06, + "loss": 1.0905, + "step": 1484 + }, + { + "epoch": 8.49, + "learning_rate": 7.571428571428572e-06, + "loss": 1.202, + "step": 1485 + }, + { + "epoch": 8.49, + "learning_rate": 7.542857142857143e-06, + "loss": 1.1799, + "step": 1486 + }, + { + "epoch": 8.5, + "learning_rate": 7.514285714285714e-06, + "loss": 1.2425, + "step": 1487 + }, + { + "epoch": 8.5, + "learning_rate": 7.485714285714286e-06, + "loss": 1.1827, + "step": 1488 + }, + { + "epoch": 8.51, + "learning_rate": 7.457142857142857e-06, + "loss": 1.1214, + "step": 1489 + }, + { + "epoch": 8.51, + "learning_rate": 7.428571428571429e-06, + "loss": 1.1768, + "step": 1490 + }, + { + "epoch": 8.52, + "learning_rate": 7.4e-06, + "loss": 1.1479, + "step": 1491 + }, + { + "epoch": 8.53, + "learning_rate": 7.371428571428572e-06, + "loss": 1.1493, + "step": 1492 + }, + { + "epoch": 8.53, + "learning_rate": 7.342857142857143e-06, + "loss": 1.1698, + "step": 1493 + }, + { + "epoch": 8.54, + "learning_rate": 7.314285714285715e-06, + "loss": 1.163, + "step": 1494 + }, + { + "epoch": 8.54, + "learning_rate": 7.285714285714286e-06, + "loss": 1.1981, + "step": 1495 + }, + { + "epoch": 8.55, + "learning_rate": 7.257142857142857e-06, + "loss": 1.1761, + "step": 1496 + }, + { + "epoch": 8.55, + "learning_rate": 7.228571428571429e-06, + "loss": 1.225, + "step": 1497 + }, + { + "epoch": 8.56, + "learning_rate": 7.2e-06, + "loss": 1.1828, + "step": 1498 + }, + { + "epoch": 8.57, + "learning_rate": 7.171428571428572e-06, + "loss": 1.1605, + "step": 1499 + }, + { + "epoch": 8.57, + "learning_rate": 7.142857142857143e-06, + "loss": 1.2068, + "step": 1500 + }, + { + "epoch": 8.58, + "learning_rate": 7.114285714285715e-06, + "loss": 1.1226, + "step": 1501 + }, + { + "epoch": 8.58, + "learning_rate": 7.085714285714286e-06, + "loss": 1.1504, + "step": 1502 + }, + { + "epoch": 8.59, + "learning_rate": 7.057142857142858e-06, + "loss": 1.2027, + "step": 1503 + }, + { + "epoch": 8.59, + "learning_rate": 7.028571428571429e-06, + "loss": 1.2045, + "step": 1504 + }, + { + "epoch": 8.6, + "learning_rate": 7.000000000000001e-06, + "loss": 1.1675, + "step": 1505 + }, + { + "epoch": 8.61, + "learning_rate": 6.971428571428572e-06, + "loss": 1.1905, + "step": 1506 + }, + { + "epoch": 8.61, + "learning_rate": 6.942857142857143e-06, + "loss": 1.159, + "step": 1507 + }, + { + "epoch": 8.62, + "learning_rate": 6.914285714285715e-06, + "loss": 1.1726, + "step": 1508 + }, + { + "epoch": 8.62, + "learning_rate": 6.885714285714286e-06, + "loss": 1.1606, + "step": 1509 + }, + { + "epoch": 8.63, + "learning_rate": 6.857142857142858e-06, + "loss": 1.143, + "step": 1510 + }, + { + "epoch": 8.63, + "learning_rate": 6.828571428571429e-06, + "loss": 1.1589, + "step": 1511 + }, + { + "epoch": 8.64, + "learning_rate": 6.800000000000001e-06, + "loss": 1.1389, + "step": 1512 + }, + { + "epoch": 8.65, + "learning_rate": 6.771428571428571e-06, + "loss": 1.2338, + "step": 1513 + }, + { + "epoch": 8.65, + "learning_rate": 6.742857142857144e-06, + "loss": 1.2066, + "step": 1514 + }, + { + "epoch": 8.66, + "learning_rate": 6.714285714285714e-06, + "loss": 1.1679, + "step": 1515 + }, + { + "epoch": 8.66, + "learning_rate": 6.685714285714285e-06, + "loss": 1.1704, + "step": 1516 + }, + { + "epoch": 8.67, + "learning_rate": 6.657142857142857e-06, + "loss": 1.0813, + "step": 1517 + }, + { + "epoch": 8.67, + "learning_rate": 6.628571428571428e-06, + "loss": 1.1983, + "step": 1518 + }, + { + "epoch": 8.68, + "learning_rate": 6.6e-06, + "loss": 1.1327, + "step": 1519 + }, + { + "epoch": 8.69, + "learning_rate": 6.5714285714285714e-06, + "loss": 1.1415, + "step": 1520 + }, + { + "epoch": 8.69, + "learning_rate": 6.542857142857143e-06, + "loss": 1.1415, + "step": 1521 + }, + { + "epoch": 8.7, + "learning_rate": 6.5142857142857145e-06, + "loss": 1.1915, + "step": 1522 + }, + { + "epoch": 8.7, + "learning_rate": 6.485714285714286e-06, + "loss": 1.2378, + "step": 1523 + }, + { + "epoch": 8.71, + "learning_rate": 6.4571428571428575e-06, + "loss": 1.1808, + "step": 1524 + }, + { + "epoch": 8.71, + "learning_rate": 6.428571428571429e-06, + "loss": 1.1028, + "step": 1525 + }, + { + "epoch": 8.72, + "learning_rate": 6.4000000000000006e-06, + "loss": 1.0962, + "step": 1526 + }, + { + "epoch": 8.73, + "learning_rate": 6.371428571428572e-06, + "loss": 1.1545, + "step": 1527 + }, + { + "epoch": 8.73, + "learning_rate": 6.342857142857144e-06, + "loss": 1.2055, + "step": 1528 + }, + { + "epoch": 8.74, + "learning_rate": 6.314285714285714e-06, + "loss": 1.154, + "step": 1529 + }, + { + "epoch": 8.74, + "learning_rate": 6.285714285714287e-06, + "loss": 1.1113, + "step": 1530 + }, + { + "epoch": 8.75, + "learning_rate": 6.257142857142857e-06, + "loss": 1.1994, + "step": 1531 + }, + { + "epoch": 8.75, + "learning_rate": 6.228571428571429e-06, + "loss": 1.1248, + "step": 1532 + }, + { + "epoch": 8.76, + "learning_rate": 6.2e-06, + "loss": 1.1793, + "step": 1533 + }, + { + "epoch": 8.77, + "learning_rate": 6.171428571428572e-06, + "loss": 1.1289, + "step": 1534 + }, + { + "epoch": 8.77, + "learning_rate": 6.142857142857143e-06, + "loss": 1.1168, + "step": 1535 + }, + { + "epoch": 8.78, + "learning_rate": 6.114285714285715e-06, + "loss": 1.2153, + "step": 1536 + }, + { + "epoch": 8.78, + "learning_rate": 6.085714285714286e-06, + "loss": 1.1003, + "step": 1537 + }, + { + "epoch": 8.79, + "learning_rate": 6.057142857142858e-06, + "loss": 1.1924, + "step": 1538 + }, + { + "epoch": 8.79, + "learning_rate": 6.028571428571428e-06, + "loss": 1.1365, + "step": 1539 + }, + { + "epoch": 8.8, + "learning_rate": 6e-06, + "loss": 1.1494, + "step": 1540 + }, + { + "epoch": 8.81, + "learning_rate": 5.971428571428571e-06, + "loss": 1.1155, + "step": 1541 + }, + { + "epoch": 8.81, + "learning_rate": 5.942857142857143e-06, + "loss": 1.2853, + "step": 1542 + }, + { + "epoch": 8.82, + "learning_rate": 5.914285714285714e-06, + "loss": 1.2212, + "step": 1543 + }, + { + "epoch": 8.82, + "learning_rate": 5.885714285714286e-06, + "loss": 1.1777, + "step": 1544 + }, + { + "epoch": 8.83, + "learning_rate": 5.857142857142857e-06, + "loss": 1.1711, + "step": 1545 + }, + { + "epoch": 8.83, + "learning_rate": 5.828571428571429e-06, + "loss": 1.1916, + "step": 1546 + }, + { + "epoch": 8.84, + "learning_rate": 5.8e-06, + "loss": 1.2016, + "step": 1547 + }, + { + "epoch": 8.85, + "learning_rate": 5.7714285714285715e-06, + "loss": 1.1548, + "step": 1548 + }, + { + "epoch": 8.85, + "learning_rate": 5.7428571428571426e-06, + "loss": 1.0878, + "step": 1549 + }, + { + "epoch": 8.86, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.1708, + "step": 1550 + }, + { + "epoch": 8.86, + "learning_rate": 5.685714285714286e-06, + "loss": 1.1818, + "step": 1551 + }, + { + "epoch": 8.87, + "learning_rate": 5.6571428571428576e-06, + "loss": 1.1823, + "step": 1552 + }, + { + "epoch": 8.87, + "learning_rate": 5.628571428571429e-06, + "loss": 1.2061, + "step": 1553 + }, + { + "epoch": 8.88, + "learning_rate": 5.600000000000001e-06, + "loss": 1.161, + "step": 1554 + }, + { + "epoch": 8.89, + "learning_rate": 5.571428571428572e-06, + "loss": 1.1154, + "step": 1555 + }, + { + "epoch": 8.89, + "learning_rate": 5.542857142857144e-06, + "loss": 1.1852, + "step": 1556 + }, + { + "epoch": 8.9, + "learning_rate": 5.514285714285715e-06, + "loss": 1.2232, + "step": 1557 + }, + { + "epoch": 8.9, + "learning_rate": 5.485714285714286e-06, + "loss": 1.1364, + "step": 1558 + }, + { + "epoch": 8.91, + "learning_rate": 5.457142857142857e-06, + "loss": 1.1736, + "step": 1559 + }, + { + "epoch": 8.91, + "learning_rate": 5.428571428571429e-06, + "loss": 1.1854, + "step": 1560 + }, + { + "epoch": 8.92, + "learning_rate": 5.4e-06, + "loss": 1.1587, + "step": 1561 + }, + { + "epoch": 8.93, + "learning_rate": 5.371428571428572e-06, + "loss": 1.1903, + "step": 1562 + }, + { + "epoch": 8.93, + "learning_rate": 5.342857142857143e-06, + "loss": 1.2218, + "step": 1563 + }, + { + "epoch": 8.94, + "learning_rate": 5.314285714285715e-06, + "loss": 1.0914, + "step": 1564 + }, + { + "epoch": 8.94, + "learning_rate": 5.285714285714286e-06, + "loss": 1.1373, + "step": 1565 + }, + { + "epoch": 8.95, + "learning_rate": 5.257142857142858e-06, + "loss": 1.1615, + "step": 1566 + }, + { + "epoch": 8.95, + "learning_rate": 5.228571428571428e-06, + "loss": 1.0707, + "step": 1567 + }, + { + "epoch": 8.96, + "learning_rate": 5.2e-06, + "loss": 1.2612, + "step": 1568 + }, + { + "epoch": 8.97, + "learning_rate": 5.171428571428571e-06, + "loss": 1.1822, + "step": 1569 + }, + { + "epoch": 8.97, + "learning_rate": 5.142857142857143e-06, + "loss": 1.2074, + "step": 1570 + }, + { + "epoch": 8.98, + "learning_rate": 5.114285714285714e-06, + "loss": 1.196, + "step": 1571 + }, + { + "epoch": 8.98, + "learning_rate": 5.085714285714286e-06, + "loss": 1.1075, + "step": 1572 + }, + { + "epoch": 8.99, + "learning_rate": 5.057142857142857e-06, + "loss": 1.0347, + "step": 1573 + }, + { + "epoch": 8.99, + "learning_rate": 5.028571428571429e-06, + "loss": 1.2361, + "step": 1574 + }, + { + "epoch": 9.0, + "learning_rate": 5e-06, + "loss": 1.1735, + "step": 1575 + }, + { + "epoch": 9.01, + "learning_rate": 4.9714285714285715e-06, + "loss": 1.1728, + "step": 1576 + }, + { + "epoch": 9.01, + "learning_rate": 4.942857142857143e-06, + "loss": 1.0923, + "step": 1577 + }, + { + "epoch": 9.02, + "learning_rate": 4.9142857142857145e-06, + "loss": 1.0906, + "step": 1578 + }, + { + "epoch": 9.02, + "learning_rate": 4.885714285714286e-06, + "loss": 1.1076, + "step": 1579 + }, + { + "epoch": 9.03, + "learning_rate": 4.857142857142858e-06, + "loss": 1.2176, + "step": 1580 + }, + { + "epoch": 9.03, + "learning_rate": 4.828571428571429e-06, + "loss": 1.1989, + "step": 1581 + }, + { + "epoch": 9.04, + "learning_rate": 4.800000000000001e-06, + "loss": 1.2039, + "step": 1582 + }, + { + "epoch": 9.05, + "learning_rate": 4.771428571428572e-06, + "loss": 1.147, + "step": 1583 + }, + { + "epoch": 9.05, + "learning_rate": 4.742857142857144e-06, + "loss": 1.1326, + "step": 1584 + }, + { + "epoch": 9.06, + "learning_rate": 4.714285714285715e-06, + "loss": 1.1783, + "step": 1585 + }, + { + "epoch": 9.06, + "learning_rate": 4.685714285714286e-06, + "loss": 1.1103, + "step": 1586 + }, + { + "epoch": 9.07, + "learning_rate": 4.657142857142857e-06, + "loss": 1.1544, + "step": 1587 + }, + { + "epoch": 9.07, + "learning_rate": 4.628571428571429e-06, + "loss": 1.1774, + "step": 1588 + }, + { + "epoch": 9.08, + "learning_rate": 4.6e-06, + "loss": 1.107, + "step": 1589 + }, + { + "epoch": 9.09, + "learning_rate": 4.571428571428572e-06, + "loss": 1.1337, + "step": 1590 + }, + { + "epoch": 9.09, + "learning_rate": 4.542857142857143e-06, + "loss": 1.1428, + "step": 1591 + }, + { + "epoch": 9.1, + "learning_rate": 4.514285714285715e-06, + "loss": 1.1712, + "step": 1592 + }, + { + "epoch": 9.1, + "learning_rate": 4.485714285714286e-06, + "loss": 1.2818, + "step": 1593 + }, + { + "epoch": 9.11, + "learning_rate": 4.457142857142858e-06, + "loss": 1.1271, + "step": 1594 + }, + { + "epoch": 9.11, + "learning_rate": 4.428571428571428e-06, + "loss": 1.2166, + "step": 1595 + }, + { + "epoch": 9.12, + "learning_rate": 4.4e-06, + "loss": 1.0997, + "step": 1596 + }, + { + "epoch": 9.13, + "learning_rate": 4.371428571428571e-06, + "loss": 1.1687, + "step": 1597 + }, + { + "epoch": 9.13, + "learning_rate": 4.342857142857143e-06, + "loss": 1.2076, + "step": 1598 + }, + { + "epoch": 9.14, + "learning_rate": 4.314285714285714e-06, + "loss": 1.0787, + "step": 1599 + }, + { + "epoch": 9.14, + "learning_rate": 4.285714285714286e-06, + "loss": 1.1682, + "step": 1600 + }, + { + "epoch": 9.15, + "learning_rate": 4.257142857142857e-06, + "loss": 1.1755, + "step": 1601 + }, + { + "epoch": 9.15, + "learning_rate": 4.228571428571429e-06, + "loss": 1.1749, + "step": 1602 + }, + { + "epoch": 9.16, + "learning_rate": 4.2000000000000004e-06, + "loss": 1.2322, + "step": 1603 + }, + { + "epoch": 9.17, + "learning_rate": 4.1714285714285715e-06, + "loss": 1.1602, + "step": 1604 + }, + { + "epoch": 9.17, + "learning_rate": 4.142857142857143e-06, + "loss": 1.1559, + "step": 1605 + }, + { + "epoch": 9.18, + "learning_rate": 4.114285714285715e-06, + "loss": 1.2234, + "step": 1606 + }, + { + "epoch": 9.18, + "learning_rate": 4.085714285714286e-06, + "loss": 1.1566, + "step": 1607 + }, + { + "epoch": 9.19, + "learning_rate": 4.057142857142858e-06, + "loss": 1.1324, + "step": 1608 + }, + { + "epoch": 9.19, + "learning_rate": 4.028571428571429e-06, + "loss": 1.2169, + "step": 1609 + }, + { + "epoch": 9.2, + "learning_rate": 4.000000000000001e-06, + "loss": 1.1903, + "step": 1610 + }, + { + "epoch": 9.21, + "learning_rate": 3.971428571428572e-06, + "loss": 1.1301, + "step": 1611 + }, + { + "epoch": 9.21, + "learning_rate": 3.942857142857143e-06, + "loss": 1.1581, + "step": 1612 + }, + { + "epoch": 9.22, + "learning_rate": 3.914285714285715e-06, + "loss": 1.1489, + "step": 1613 + }, + { + "epoch": 9.22, + "learning_rate": 3.885714285714286e-06, + "loss": 1.244, + "step": 1614 + }, + { + "epoch": 9.23, + "learning_rate": 3.857142857142857e-06, + "loss": 1.1807, + "step": 1615 + }, + { + "epoch": 9.23, + "learning_rate": 3.828571428571429e-06, + "loss": 1.098, + "step": 1616 + }, + { + "epoch": 9.24, + "learning_rate": 3.8e-06, + "loss": 1.1864, + "step": 1617 + }, + { + "epoch": 9.25, + "learning_rate": 3.7714285714285716e-06, + "loss": 1.1447, + "step": 1618 + }, + { + "epoch": 9.25, + "learning_rate": 3.742857142857143e-06, + "loss": 1.1248, + "step": 1619 + }, + { + "epoch": 9.26, + "learning_rate": 3.7142857142857146e-06, + "loss": 1.0697, + "step": 1620 + }, + { + "epoch": 9.26, + "learning_rate": 3.685714285714286e-06, + "loss": 1.1363, + "step": 1621 + }, + { + "epoch": 9.27, + "learning_rate": 3.6571428571428576e-06, + "loss": 1.128, + "step": 1622 + }, + { + "epoch": 9.27, + "learning_rate": 3.6285714285714283e-06, + "loss": 1.1087, + "step": 1623 + }, + { + "epoch": 9.28, + "learning_rate": 3.6e-06, + "loss": 1.184, + "step": 1624 + }, + { + "epoch": 9.29, + "learning_rate": 3.5714285714285714e-06, + "loss": 1.1856, + "step": 1625 + }, + { + "epoch": 9.29, + "learning_rate": 3.542857142857143e-06, + "loss": 1.1032, + "step": 1626 + }, + { + "epoch": 9.3, + "learning_rate": 3.5142857142857144e-06, + "loss": 1.1943, + "step": 1627 + }, + { + "epoch": 9.3, + "learning_rate": 3.485714285714286e-06, + "loss": 1.144, + "step": 1628 + }, + { + "epoch": 9.31, + "learning_rate": 3.4571428571428574e-06, + "loss": 1.173, + "step": 1629 + }, + { + "epoch": 9.31, + "learning_rate": 3.428571428571429e-06, + "loss": 1.1487, + "step": 1630 + }, + { + "epoch": 9.32, + "learning_rate": 3.4000000000000005e-06, + "loss": 1.1552, + "step": 1631 + }, + { + "epoch": 9.33, + "learning_rate": 3.371428571428572e-06, + "loss": 1.1868, + "step": 1632 + }, + { + "epoch": 9.33, + "learning_rate": 3.3428571428571427e-06, + "loss": 1.1577, + "step": 1633 + }, + { + "epoch": 9.34, + "learning_rate": 3.314285714285714e-06, + "loss": 1.1659, + "step": 1634 + }, + { + "epoch": 9.34, + "learning_rate": 3.2857142857142857e-06, + "loss": 1.2075, + "step": 1635 + }, + { + "epoch": 9.35, + "learning_rate": 3.2571428571428572e-06, + "loss": 1.1657, + "step": 1636 + }, + { + "epoch": 9.35, + "learning_rate": 3.2285714285714288e-06, + "loss": 1.1275, + "step": 1637 + }, + { + "epoch": 9.36, + "learning_rate": 3.2000000000000003e-06, + "loss": 1.1619, + "step": 1638 + }, + { + "epoch": 9.37, + "learning_rate": 3.171428571428572e-06, + "loss": 1.0813, + "step": 1639 + }, + { + "epoch": 9.37, + "learning_rate": 3.1428571428571433e-06, + "loss": 1.1533, + "step": 1640 + }, + { + "epoch": 9.38, + "learning_rate": 3.1142857142857144e-06, + "loss": 1.1754, + "step": 1641 + }, + { + "epoch": 9.38, + "learning_rate": 3.085714285714286e-06, + "loss": 1.1399, + "step": 1642 + }, + { + "epoch": 9.39, + "learning_rate": 3.0571428571428575e-06, + "loss": 1.1924, + "step": 1643 + }, + { + "epoch": 9.39, + "learning_rate": 3.028571428571429e-06, + "loss": 1.1303, + "step": 1644 + }, + { + "epoch": 9.4, + "learning_rate": 3e-06, + "loss": 1.2072, + "step": 1645 + }, + { + "epoch": 9.41, + "learning_rate": 2.9714285714285716e-06, + "loss": 1.1663, + "step": 1646 + }, + { + "epoch": 9.41, + "learning_rate": 2.942857142857143e-06, + "loss": 1.1577, + "step": 1647 + }, + { + "epoch": 9.42, + "learning_rate": 2.9142857142857146e-06, + "loss": 1.1738, + "step": 1648 + }, + { + "epoch": 9.42, + "learning_rate": 2.8857142857142857e-06, + "loss": 1.0478, + "step": 1649 + }, + { + "epoch": 9.43, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.0933, + "step": 1650 + }, + { + "epoch": 9.43, + "learning_rate": 2.8285714285714288e-06, + "loss": 1.1358, + "step": 1651 + }, + { + "epoch": 9.44, + "learning_rate": 2.8000000000000003e-06, + "loss": 1.094, + "step": 1652 + }, + { + "epoch": 9.45, + "learning_rate": 2.771428571428572e-06, + "loss": 1.1383, + "step": 1653 + }, + { + "epoch": 9.45, + "learning_rate": 2.742857142857143e-06, + "loss": 1.2338, + "step": 1654 + }, + { + "epoch": 9.46, + "learning_rate": 2.7142857142857144e-06, + "loss": 1.1278, + "step": 1655 + }, + { + "epoch": 9.46, + "learning_rate": 2.685714285714286e-06, + "loss": 1.123, + "step": 1656 + }, + { + "epoch": 9.47, + "learning_rate": 2.6571428571428575e-06, + "loss": 1.1989, + "step": 1657 + }, + { + "epoch": 9.47, + "learning_rate": 2.628571428571429e-06, + "loss": 1.1712, + "step": 1658 + }, + { + "epoch": 9.48, + "learning_rate": 2.6e-06, + "loss": 1.1172, + "step": 1659 + }, + { + "epoch": 9.49, + "learning_rate": 2.5714285714285716e-06, + "loss": 1.1932, + "step": 1660 + }, + { + "epoch": 9.49, + "learning_rate": 2.542857142857143e-06, + "loss": 1.0686, + "step": 1661 + }, + { + "epoch": 9.5, + "learning_rate": 2.5142857142857147e-06, + "loss": 1.1874, + "step": 1662 + }, + { + "epoch": 9.5, + "learning_rate": 2.4857142857142858e-06, + "loss": 1.1438, + "step": 1663 + }, + { + "epoch": 9.51, + "learning_rate": 2.4571428571428573e-06, + "loss": 1.179, + "step": 1664 + }, + { + "epoch": 9.51, + "learning_rate": 2.428571428571429e-06, + "loss": 1.1445, + "step": 1665 + }, + { + "epoch": 9.52, + "learning_rate": 2.4000000000000003e-06, + "loss": 1.0935, + "step": 1666 + }, + { + "epoch": 9.53, + "learning_rate": 2.371428571428572e-06, + "loss": 1.2171, + "step": 1667 + }, + { + "epoch": 9.53, + "learning_rate": 2.342857142857143e-06, + "loss": 1.2176, + "step": 1668 + }, + { + "epoch": 9.54, + "learning_rate": 2.3142857142857145e-06, + "loss": 1.0836, + "step": 1669 + }, + { + "epoch": 9.54, + "learning_rate": 2.285714285714286e-06, + "loss": 1.1739, + "step": 1670 + }, + { + "epoch": 9.55, + "learning_rate": 2.2571428571428575e-06, + "loss": 1.1265, + "step": 1671 + }, + { + "epoch": 9.55, + "learning_rate": 2.228571428571429e-06, + "loss": 1.1278, + "step": 1672 + }, + { + "epoch": 9.56, + "learning_rate": 2.2e-06, + "loss": 1.1497, + "step": 1673 + }, + { + "epoch": 9.57, + "learning_rate": 2.1714285714285716e-06, + "loss": 1.1593, + "step": 1674 + }, + { + "epoch": 9.57, + "learning_rate": 2.142857142857143e-06, + "loss": 1.0416, + "step": 1675 + }, + { + "epoch": 9.58, + "learning_rate": 2.1142857142857147e-06, + "loss": 1.19, + "step": 1676 + }, + { + "epoch": 9.58, + "learning_rate": 2.0857142857142858e-06, + "loss": 1.1379, + "step": 1677 + }, + { + "epoch": 9.59, + "learning_rate": 2.0571428571428573e-06, + "loss": 1.1397, + "step": 1678 + }, + { + "epoch": 9.59, + "learning_rate": 2.028571428571429e-06, + "loss": 1.1533, + "step": 1679 + }, + { + "epoch": 9.6, + "learning_rate": 2.0000000000000003e-06, + "loss": 1.1168, + "step": 1680 + }, + { + "epoch": 9.61, + "learning_rate": 1.9714285714285714e-06, + "loss": 1.1514, + "step": 1681 + }, + { + "epoch": 9.61, + "learning_rate": 1.942857142857143e-06, + "loss": 1.143, + "step": 1682 + }, + { + "epoch": 9.62, + "learning_rate": 1.9142857142857145e-06, + "loss": 1.2329, + "step": 1683 + }, + { + "epoch": 9.62, + "learning_rate": 1.8857142857142858e-06, + "loss": 1.1127, + "step": 1684 + }, + { + "epoch": 9.63, + "learning_rate": 1.8571428571428573e-06, + "loss": 1.1063, + "step": 1685 + }, + { + "epoch": 9.63, + "learning_rate": 1.8285714285714288e-06, + "loss": 1.1945, + "step": 1686 + }, + { + "epoch": 9.64, + "learning_rate": 1.8e-06, + "loss": 1.1076, + "step": 1687 + }, + { + "epoch": 9.65, + "learning_rate": 1.7714285714285714e-06, + "loss": 1.1526, + "step": 1688 + }, + { + "epoch": 9.65, + "learning_rate": 1.742857142857143e-06, + "loss": 1.1742, + "step": 1689 + }, + { + "epoch": 9.66, + "learning_rate": 1.7142857142857145e-06, + "loss": 1.1234, + "step": 1690 + }, + { + "epoch": 9.66, + "learning_rate": 1.685714285714286e-06, + "loss": 1.1422, + "step": 1691 + }, + { + "epoch": 9.67, + "learning_rate": 1.657142857142857e-06, + "loss": 1.1065, + "step": 1692 + }, + { + "epoch": 9.67, + "learning_rate": 1.6285714285714286e-06, + "loss": 1.2324, + "step": 1693 + }, + { + "epoch": 9.68, + "learning_rate": 1.6000000000000001e-06, + "loss": 1.1745, + "step": 1694 + }, + { + "epoch": 9.69, + "learning_rate": 1.5714285714285717e-06, + "loss": 1.1504, + "step": 1695 + }, + { + "epoch": 9.69, + "learning_rate": 1.542857142857143e-06, + "loss": 1.164, + "step": 1696 + }, + { + "epoch": 9.7, + "learning_rate": 1.5142857142857145e-06, + "loss": 1.147, + "step": 1697 + }, + { + "epoch": 9.7, + "learning_rate": 1.4857142857142858e-06, + "loss": 1.1331, + "step": 1698 + }, + { + "epoch": 9.71, + "learning_rate": 1.4571428571428573e-06, + "loss": 1.1321, + "step": 1699 + }, + { + "epoch": 9.71, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.1113, + "step": 1700 + }, + { + "epoch": 9.72, + "learning_rate": 1.4000000000000001e-06, + "loss": 1.0999, + "step": 1701 + }, + { + "epoch": 9.73, + "learning_rate": 1.3714285714285715e-06, + "loss": 1.1162, + "step": 1702 + }, + { + "epoch": 9.73, + "learning_rate": 1.342857142857143e-06, + "loss": 1.1185, + "step": 1703 + }, + { + "epoch": 9.74, + "learning_rate": 1.3142857142857145e-06, + "loss": 1.1674, + "step": 1704 + }, + { + "epoch": 9.74, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.1942, + "step": 1705 + }, + { + "epoch": 9.75, + "learning_rate": 1.2571428571428573e-06, + "loss": 1.1559, + "step": 1706 + }, + { + "epoch": 9.75, + "learning_rate": 1.2285714285714286e-06, + "loss": 1.1529, + "step": 1707 + }, + { + "epoch": 9.76, + "learning_rate": 1.2000000000000002e-06, + "loss": 1.1976, + "step": 1708 + }, + { + "epoch": 9.77, + "learning_rate": 1.1714285714285715e-06, + "loss": 1.1535, + "step": 1709 + }, + { + "epoch": 9.77, + "learning_rate": 1.142857142857143e-06, + "loss": 1.1287, + "step": 1710 + }, + { + "epoch": 9.78, + "learning_rate": 1.1142857142857145e-06, + "loss": 1.2045, + "step": 1711 + }, + { + "epoch": 9.78, + "learning_rate": 1.0857142857142858e-06, + "loss": 1.1061, + "step": 1712 + }, + { + "epoch": 9.79, + "learning_rate": 1.0571428571428573e-06, + "loss": 1.1602, + "step": 1713 + }, + { + "epoch": 9.79, + "learning_rate": 1.0285714285714286e-06, + "loss": 1.1806, + "step": 1714 + }, + { + "epoch": 9.8, + "learning_rate": 1.0000000000000002e-06, + "loss": 1.1134, + "step": 1715 + }, + { + "epoch": 9.81, + "learning_rate": 9.714285714285715e-07, + "loss": 1.0925, + "step": 1716 + }, + { + "epoch": 9.81, + "learning_rate": 9.428571428571429e-07, + "loss": 1.1441, + "step": 1717 + }, + { + "epoch": 9.82, + "learning_rate": 9.142857142857144e-07, + "loss": 1.1646, + "step": 1718 + }, + { + "epoch": 9.82, + "learning_rate": 8.857142857142857e-07, + "loss": 1.2171, + "step": 1719 + }, + { + "epoch": 9.83, + "learning_rate": 8.571428571428572e-07, + "loss": 1.1863, + "step": 1720 + }, + { + "epoch": 9.83, + "learning_rate": 8.285714285714285e-07, + "loss": 1.1377, + "step": 1721 + }, + { + "epoch": 9.84, + "learning_rate": 8.000000000000001e-07, + "loss": 1.1671, + "step": 1722 + }, + { + "epoch": 9.85, + "learning_rate": 7.714285714285715e-07, + "loss": 1.1242, + "step": 1723 + }, + { + "epoch": 9.85, + "learning_rate": 7.428571428571429e-07, + "loss": 1.192, + "step": 1724 + }, + { + "epoch": 9.86, + "learning_rate": 7.142857142857143e-07, + "loss": 1.1033, + "step": 1725 + }, + { + "epoch": 9.86, + "learning_rate": 6.857142857142857e-07, + "loss": 1.1654, + "step": 1726 + }, + { + "epoch": 9.87, + "learning_rate": 6.571428571428572e-07, + "loss": 1.2146, + "step": 1727 + }, + { + "epoch": 9.87, + "learning_rate": 6.285714285714287e-07, + "loss": 1.0838, + "step": 1728 + }, + { + "epoch": 9.88, + "learning_rate": 6.000000000000001e-07, + "loss": 1.1062, + "step": 1729 + }, + { + "epoch": 9.89, + "learning_rate": 5.714285714285715e-07, + "loss": 1.1258, + "step": 1730 + }, + { + "epoch": 9.89, + "learning_rate": 5.428571428571429e-07, + "loss": 1.1799, + "step": 1731 + }, + { + "epoch": 9.9, + "learning_rate": 5.142857142857143e-07, + "loss": 1.1261, + "step": 1732 + }, + { + "epoch": 9.9, + "learning_rate": 4.857142857142857e-07, + "loss": 1.1668, + "step": 1733 + }, + { + "epoch": 9.91, + "learning_rate": 4.571428571428572e-07, + "loss": 1.0714, + "step": 1734 + }, + { + "epoch": 9.91, + "learning_rate": 4.285714285714286e-07, + "loss": 1.1479, + "step": 1735 + }, + { + "epoch": 9.92, + "learning_rate": 4.0000000000000003e-07, + "loss": 1.2092, + "step": 1736 + }, + { + "epoch": 9.93, + "learning_rate": 3.7142857142857145e-07, + "loss": 1.1544, + "step": 1737 + }, + { + "epoch": 9.93, + "learning_rate": 3.4285714285714286e-07, + "loss": 1.1102, + "step": 1738 + }, + { + "epoch": 9.94, + "learning_rate": 3.1428571428571433e-07, + "loss": 1.162, + "step": 1739 + }, + { + "epoch": 9.94, + "learning_rate": 2.8571428571428575e-07, + "loss": 1.1376, + "step": 1740 + }, + { + "epoch": 9.95, + "learning_rate": 2.5714285714285716e-07, + "loss": 1.1805, + "step": 1741 + }, + { + "epoch": 9.95, + "learning_rate": 2.285714285714286e-07, + "loss": 1.1403, + "step": 1742 + }, + { + "epoch": 9.96, + "learning_rate": 2.0000000000000002e-07, + "loss": 1.1657, + "step": 1743 + }, + { + "epoch": 9.97, + "learning_rate": 1.7142857142857143e-07, + "loss": 1.1106, + "step": 1744 + }, + { + "epoch": 9.97, + "learning_rate": 1.4285714285714287e-07, + "loss": 1.256, + "step": 1745 + }, + { + "epoch": 9.98, + "learning_rate": 1.142857142857143e-07, + "loss": 1.1122, + "step": 1746 + }, + { + "epoch": 9.98, + "learning_rate": 8.571428571428572e-08, + "loss": 1.1415, + "step": 1747 + }, + { + "epoch": 9.99, + "learning_rate": 5.714285714285715e-08, + "loss": 1.1179, + "step": 1748 + }, + { + "epoch": 9.99, + "learning_rate": 2.8571428571428575e-08, + "loss": 1.1105, + "step": 1749 + }, + { + "epoch": 10.0, + "learning_rate": 0.0, + "loss": 1.1718, + "step": 1750 + }, + { + "epoch": 10.0, + "step": 1750, + "total_flos": 4.233246129782784e+16, + "train_loss": 1.389875247819083, + "train_runtime": 2378.4813, + "train_samples_per_second": 26.366, + "train_steps_per_second": 0.736 } ], - "max_steps": 786, - "num_train_epochs": 3, - "total_flos": 1.2699738389348352e+16, + "max_steps": 1750, + "num_train_epochs": 10, + "total_flos": 4.233246129782784e+16, "trial_name": null, "trial_params": null }