diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9995796553173603, - "global_step": 1189, + "epoch": 0.9987389659520807, + "global_step": 594, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -10,7166 +10,3587 @@ { "epoch": 0.0, "learning_rate": 0.0, - "loss": 9.1784, + "loss": 9.2391, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.0, - "loss": 8.5725, + "loss": 9.104, "step": 2 }, { - "epoch": 0.0, - "learning_rate": 2e-06, - "loss": 9.3285, + "epoch": 0.01, + "learning_rate": 0.0, + "loss": 8.9021, "step": 3 }, { - "epoch": 0.0, + "epoch": 0.01, "learning_rate": 2e-06, - "loss": 9.1371, + "loss": 8.8682, "step": 4 }, { - "epoch": 0.0, + "epoch": 0.01, "learning_rate": 4e-06, - "loss": 8.9227, + "loss": 8.8262, "step": 5 }, { "epoch": 0.01, "learning_rate": 6e-06, - "loss": 8.7957, + "loss": 8.4047, "step": 6 }, { "epoch": 0.01, "learning_rate": 8e-06, - "loss": 8.6135, + "loss": 8.3502, "step": 7 }, { "epoch": 0.01, "learning_rate": 1e-05, - "loss": 8.479, + "loss": 7.9771, "step": 8 }, { - "epoch": 0.01, + "epoch": 0.02, "learning_rate": 1.2e-05, - "loss": 8.2944, + "loss": 7.8414, "step": 9 }, { - "epoch": 0.01, + "epoch": 0.02, "learning_rate": 1.4e-05, - "loss": 8.06, + "loss": 7.6606, "step": 10 }, { - "epoch": 0.01, + "epoch": 0.02, "learning_rate": 1.6e-05, - "loss": 7.929, + "loss": 7.5532, "step": 11 }, { - "epoch": 0.01, + "epoch": 0.02, "learning_rate": 1.8e-05, - "loss": 7.7736, + "loss": 7.5309, "step": 12 }, { - "epoch": 0.01, + "epoch": 0.02, "learning_rate": 2e-05, - "loss": 7.5648, + "loss": 7.3389, "step": 13 }, { - "epoch": 0.01, + "epoch": 0.02, "learning_rate": 2.2e-05, - "loss": 7.6847, + "loss": 7.5473, "step": 14 }, { - "epoch": 0.01, + "epoch": 0.03, "learning_rate": 2.4e-05, - "loss": 7.9066, + "loss": 7.3029, "step": 15 }, { - "epoch": 0.01, + "epoch": 0.03, "learning_rate": 2.6e-05, - "loss": 7.4937, + "loss": 7.3217, "step": 16 }, { - "epoch": 0.01, + "epoch": 0.03, "learning_rate": 2.8e-05, - "loss": 7.4857, + "loss": 7.3244, "step": 17 }, { - "epoch": 0.02, + "epoch": 0.03, "learning_rate": 3e-05, - "loss": 7.3758, + "loss": 7.2319, "step": 18 }, { - "epoch": 0.02, + "epoch": 0.03, "learning_rate": 3.2e-05, - "loss": 7.2432, + "loss": 7.1778, "step": 19 }, { - "epoch": 0.02, + "epoch": 0.03, "learning_rate": 3.4000000000000007e-05, - "loss": 7.4421, + "loss": 7.257, "step": 20 }, { - "epoch": 0.02, + "epoch": 0.04, "learning_rate": 3.6e-05, - "loss": 7.2182, + "loss": 7.3211, "step": 21 }, { - "epoch": 0.02, + "epoch": 0.04, "learning_rate": 3.8e-05, - "loss": 7.3485, + "loss": 7.2754, "step": 22 }, { - "epoch": 0.02, + "epoch": 0.04, "learning_rate": 4e-05, - "loss": 7.3024, + "loss": 7.268, "step": 23 }, { - "epoch": 0.02, + "epoch": 0.04, "learning_rate": 4.2000000000000004e-05, - "loss": 7.2753, + "loss": 7.2223, "step": 24 }, { - "epoch": 0.02, + "epoch": 0.04, "learning_rate": 4.4e-05, - "loss": 7.1561, + "loss": 7.0232, "step": 25 }, { - "epoch": 0.02, + "epoch": 0.04, "learning_rate": 4.6e-05, - "loss": 7.3336, + "loss": 7.377, "step": 26 }, { - "epoch": 0.02, + "epoch": 0.05, "learning_rate": 4.8e-05, - "loss": 7.1281, + "loss": 7.1471, "step": 27 }, { - "epoch": 0.02, + "epoch": 0.05, "learning_rate": 5e-05, - "loss": 7.0395, + "loss": 7.0311, "step": 28 }, { - "epoch": 0.02, + "epoch": 0.05, "learning_rate": 5.2e-05, - "loss": 7.2563, + "loss": 7.1356, "step": 29 }, { - "epoch": 0.03, + "epoch": 0.05, "learning_rate": 5.4e-05, - "loss": 7.3104, + "loss": 7.1118, "step": 30 }, { - "epoch": 0.03, + "epoch": 0.05, "learning_rate": 5.6e-05, - "loss": 7.1971, + "loss": 7.2032, "step": 31 }, { - "epoch": 0.03, + "epoch": 0.05, "learning_rate": 5.800000000000001e-05, - "loss": 7.1307, + "loss": 7.1625, "step": 32 }, { - "epoch": 0.03, + "epoch": 0.06, "learning_rate": 6e-05, - "loss": 7.3249, + "loss": 7.0952, "step": 33 }, { - "epoch": 0.03, + "epoch": 0.06, "learning_rate": 6.2e-05, - "loss": 6.9528, + "loss": 7.0539, "step": 34 }, { - "epoch": 0.03, + "epoch": 0.06, "learning_rate": 6.4e-05, - "loss": 7.0472, + "loss": 7.0215, "step": 35 }, { - "epoch": 0.03, + "epoch": 0.06, "learning_rate": 6.6e-05, - "loss": 7.2907, + "loss": 7.1635, "step": 36 }, { - "epoch": 0.03, + "epoch": 0.06, "learning_rate": 6.800000000000001e-05, - "loss": 7.1711, + "loss": 7.118, "step": 37 }, { - "epoch": 0.03, + "epoch": 0.06, "learning_rate": 7.000000000000001e-05, - "loss": 7.1423, + "loss": 7.1143, "step": 38 }, { - "epoch": 0.03, + "epoch": 0.07, "learning_rate": 7.2e-05, - "loss": 7.2415, + "loss": 7.1712, "step": 39 }, { - "epoch": 0.03, + "epoch": 0.07, "learning_rate": 7.4e-05, - "loss": 7.0673, + "loss": 7.0529, "step": 40 }, { - "epoch": 0.03, + "epoch": 0.07, "learning_rate": 7.6e-05, - "loss": 7.0163, + "loss": 7.0855, "step": 41 }, { - "epoch": 0.04, + "epoch": 0.07, "learning_rate": 7.8e-05, - "loss": 7.1791, + "loss": 7.1638, "step": 42 }, { - "epoch": 0.04, + "epoch": 0.07, "learning_rate": 8e-05, - "loss": 7.0275, + "loss": 6.9741, "step": 43 }, { - "epoch": 0.04, + "epoch": 0.07, "learning_rate": 8.2e-05, - "loss": 7.0427, + "loss": 7.0521, "step": 44 }, { - "epoch": 0.04, + "epoch": 0.08, "learning_rate": 8.400000000000001e-05, - "loss": 7.1336, + "loss": 7.1212, "step": 45 }, { - "epoch": 0.04, + "epoch": 0.08, "learning_rate": 8.599999999999999e-05, - "loss": 7.0856, + "loss": 7.0775, "step": 46 }, { - "epoch": 0.04, + "epoch": 0.08, "learning_rate": 8.8e-05, - "loss": 7.0557, + "loss": 6.9064, "step": 47 }, { - "epoch": 0.04, + "epoch": 0.08, "learning_rate": 8.999999999999999e-05, - "loss": 7.0549, + "loss": 7.0588, "step": 48 }, { - "epoch": 0.04, + "epoch": 0.08, "learning_rate": 9.2e-05, - "loss": 6.824, + "loss": 6.7153, "step": 49 }, { - "epoch": 0.04, + "epoch": 0.08, "learning_rate": 9.400000000000001e-05, - "loss": 6.6725, + "loss": 6.5244, "step": 50 }, { - "epoch": 0.04, + "epoch": 0.09, "learning_rate": 9.6e-05, - "loss": 7.4345, + "loss": 7.3138, "step": 51 }, { - "epoch": 0.04, + "epoch": 0.09, "learning_rate": 9.800000000000001e-05, - "loss": 7.1991, + "loss": 7.2796, "step": 52 }, { - "epoch": 0.04, + "epoch": 0.09, "learning_rate": 0.0001, - "loss": 7.0812, + "loss": 7.0741, "step": 53 }, { - "epoch": 0.05, + "epoch": 0.09, "learning_rate": 0.000102, - "loss": 7.1414, + "loss": 7.1467, "step": 54 }, { - "epoch": 0.05, + "epoch": 0.09, "learning_rate": 0.000104, - "loss": 7.2373, + "loss": 7.0, "step": 55 }, { - "epoch": 0.05, + "epoch": 0.09, "learning_rate": 0.000106, - "loss": 6.9596, + "loss": 7.1375, "step": 56 }, { - "epoch": 0.05, + "epoch": 0.1, "learning_rate": 0.000108, - "loss": 7.3832, + "loss": 6.9579, "step": 57 }, { - "epoch": 0.05, + "epoch": 0.1, "learning_rate": 0.00011, - "loss": 6.9846, + "loss": 7.1664, "step": 58 }, { - "epoch": 0.05, + "epoch": 0.1, "learning_rate": 0.000112, - "loss": 6.976, + "loss": 7.1162, "step": 59 }, { - "epoch": 0.05, + "epoch": 0.1, "learning_rate": 0.000114, - "loss": 7.1303, + "loss": 6.9863, "step": 60 }, { - "epoch": 0.05, + "epoch": 0.1, "learning_rate": 0.00011600000000000001, - "loss": 6.9403, + "loss": 7.1279, "step": 61 }, { - "epoch": 0.05, + "epoch": 0.1, "learning_rate": 0.000118, - "loss": 7.0249, + "loss": 7.0931, "step": 62 }, { - "epoch": 0.05, + "epoch": 0.11, "learning_rate": 0.00012, - "loss": 7.1265, + "loss": 7.0619, "step": 63 }, { - "epoch": 0.05, + "epoch": 0.11, "learning_rate": 0.000122, - "loss": 7.0689, + "loss": 7.0442, "step": 64 }, { - "epoch": 0.05, + "epoch": 0.11, "learning_rate": 0.000124, - "loss": 7.0175, + "loss": 7.0437, "step": 65 }, { - "epoch": 0.06, + "epoch": 0.11, "learning_rate": 0.000126, - "loss": 7.0994, + "loss": 7.0038, "step": 66 }, { - "epoch": 0.06, + "epoch": 0.11, "learning_rate": 0.000128, - "loss": 7.0589, + "loss": 7.05, "step": 67 }, { - "epoch": 0.06, + "epoch": 0.11, "learning_rate": 0.00013000000000000002, - "loss": 6.9998, + "loss": 6.863, "step": 68 }, { - "epoch": 0.06, + "epoch": 0.12, "learning_rate": 0.000132, - "loss": 6.9596, + "loss": 7.0607, "step": 69 }, { - "epoch": 0.06, + "epoch": 0.12, "learning_rate": 0.000134, - "loss": 7.0045, + "loss": 7.0143, "step": 70 }, { - "epoch": 0.06, + "epoch": 0.12, "learning_rate": 0.00013600000000000003, - "loss": 7.1545, + "loss": 7.041, "step": 71 }, { - "epoch": 0.06, + "epoch": 0.12, "learning_rate": 0.00013800000000000002, - "loss": 7.2349, + "loss": 7.151, "step": 72 }, { - "epoch": 0.06, + "epoch": 0.12, "learning_rate": 0.00014000000000000001, - "loss": 7.0776, + "loss": 6.8324, "step": 73 }, { - "epoch": 0.06, + "epoch": 0.12, "learning_rate": 0.00014199999999999998, - "loss": 7.1223, + "loss": 6.9986, "step": 74 }, { - "epoch": 0.06, + "epoch": 0.13, "learning_rate": 0.000144, - "loss": 6.8991, + "loss": 7.0604, "step": 75 }, { - "epoch": 0.06, + "epoch": 0.13, "learning_rate": 0.000146, - "loss": 7.2667, + "loss": 7.0742, "step": 76 }, { - "epoch": 0.06, + "epoch": 0.13, "learning_rate": 0.000148, - "loss": 7.0611, + "loss": 7.0835, "step": 77 }, { - "epoch": 0.07, + "epoch": 0.13, "learning_rate": 0.00015, - "loss": 6.8979, + "loss": 7.0913, "step": 78 }, { - "epoch": 0.07, + "epoch": 0.13, "learning_rate": 0.000152, - "loss": 6.927, + "loss": 6.9772, "step": 79 }, { - "epoch": 0.07, + "epoch": 0.13, "learning_rate": 0.000154, - "loss": 7.0384, + "loss": 7.025, "step": 80 }, { - "epoch": 0.07, + "epoch": 0.14, "learning_rate": 0.000156, - "loss": 7.1154, + "loss": 7.0169, "step": 81 }, { - "epoch": 0.07, + "epoch": 0.14, "learning_rate": 0.000158, - "loss": 7.2001, + "loss": 7.0588, "step": 82 }, { - "epoch": 0.07, + "epoch": 0.14, "learning_rate": 0.00016, - "loss": 6.965, + "loss": 7.0161, "step": 83 }, { - "epoch": 0.07, + "epoch": 0.14, "learning_rate": 0.000162, - "loss": 7.1127, + "loss": 7.0474, "step": 84 }, { - "epoch": 0.07, + "epoch": 0.14, "learning_rate": 0.000164, - "loss": 7.0066, + "loss": 6.9451, "step": 85 }, { - "epoch": 0.07, + "epoch": 0.14, "learning_rate": 0.00016600000000000002, - "loss": 7.076, + "loss": 6.8814, "step": 86 }, { - "epoch": 0.07, + "epoch": 0.15, "learning_rate": 0.00016800000000000002, - "loss": 6.9905, + "loss": 7.0686, "step": 87 }, { - "epoch": 0.07, + "epoch": 0.15, "learning_rate": 0.00017, - "loss": 7.0706, + "loss": 7.0055, "step": 88 }, { - "epoch": 0.07, + "epoch": 0.15, "learning_rate": 0.00017199999999999998, - "loss": 7.2036, + "loss": 6.9607, "step": 89 }, { - "epoch": 0.08, + "epoch": 0.15, "learning_rate": 0.000174, - "loss": 7.0382, + "loss": 7.0966, "step": 90 }, { - "epoch": 0.08, + "epoch": 0.15, "learning_rate": 0.000176, - "loss": 7.2465, + "loss": 6.9914, "step": 91 }, { - "epoch": 0.08, + "epoch": 0.15, "learning_rate": 0.000178, - "loss": 7.1257, + "loss": 7.0491, "step": 92 }, { - "epoch": 0.08, + "epoch": 0.16, "learning_rate": 0.00017999999999999998, - "loss": 6.8238, + "loss": 6.9837, "step": 93 }, { - "epoch": 0.08, + "epoch": 0.16, "learning_rate": 0.000182, - "loss": 7.1199, + "loss": 6.9912, "step": 94 }, { - "epoch": 0.08, + "epoch": 0.16, "learning_rate": 0.000184, - "loss": 7.2177, + "loss": 6.8828, "step": 95 }, { - "epoch": 0.08, + "epoch": 0.16, "learning_rate": 0.000186, - "loss": 6.8903, + "loss": 6.8851, "step": 96 }, { - "epoch": 0.08, + "epoch": 0.16, "learning_rate": 0.00018800000000000002, - "loss": 7.0236, + "loss": 6.9958, "step": 97 }, { - "epoch": 0.08, + "epoch": 0.16, "learning_rate": 0.00019, - "loss": 6.9423, + "loss": 6.6052, "step": 98 }, { - "epoch": 0.08, + "epoch": 0.17, "learning_rate": 0.000192, - "loss": 6.6384, + "loss": 6.8389, "step": 99 }, { - "epoch": 0.08, + "epoch": 0.17, "learning_rate": 0.000194, - "loss": 6.4692, + "loss": 6.4098, "step": 100 }, { - "epoch": 0.08, + "epoch": 0.17, "learning_rate": 0.00019600000000000002, - "loss": 7.0916, + "loss": 7.1685, "step": 101 }, { - "epoch": 0.09, + "epoch": 0.17, "learning_rate": 0.00019800000000000002, - "loss": 7.0911, + "loss": 7.0531, "step": 102 }, { - "epoch": 0.09, + "epoch": 0.17, "learning_rate": 0.0002, - "loss": 7.0966, + "loss": 7.0706, "step": 103 }, { - "epoch": 0.09, + "epoch": 0.17, "learning_rate": 0.000202, - "loss": 7.0376, + "loss": 6.9564, "step": 104 }, { - "epoch": 0.09, + "epoch": 0.18, "learning_rate": 0.000204, - "loss": 6.9953, + "loss": 6.9889, "step": 105 }, { - "epoch": 0.09, + "epoch": 0.18, "learning_rate": 0.000206, - "loss": 7.1316, + "loss": 7.0398, "step": 106 }, { - "epoch": 0.09, + "epoch": 0.18, "learning_rate": 0.000208, - "loss": 7.0401, + "loss": 7.1539, "step": 107 }, { - "epoch": 0.09, + "epoch": 0.18, "learning_rate": 0.00021, - "loss": 7.1044, + "loss": 7.0754, "step": 108 }, { - "epoch": 0.09, + "epoch": 0.18, "learning_rate": 0.000212, - "loss": 7.2254, + "loss": 7.0787, "step": 109 }, { - "epoch": 0.09, + "epoch": 0.18, "learning_rate": 0.000214, - "loss": 7.1079, + "loss": 7.044, "step": 110 }, { - "epoch": 0.09, + "epoch": 0.19, "learning_rate": 0.000216, - "loss": 7.1528, + "loss": 6.9558, "step": 111 }, { - "epoch": 0.09, + "epoch": 0.19, "learning_rate": 0.000218, - "loss": 7.1297, + "loss": 6.9957, "step": 112 }, { - "epoch": 0.09, + "epoch": 0.19, "learning_rate": 0.00022, - "loss": 7.0263, + "loss": 6.9355, "step": 113 }, { - "epoch": 0.1, + "epoch": 0.19, "learning_rate": 0.000222, - "loss": 22.7629, + "loss": 7.0677, "step": 114 }, { - "epoch": 0.1, + "epoch": 0.19, "learning_rate": 0.000224, - "loss": 14.4324, + "loss": 7.0284, "step": 115 }, { - "epoch": 0.1, - "learning_rate": 0.000224, - "loss": 15.1333, + "epoch": 0.2, + "learning_rate": 0.00022600000000000002, + "loss": 6.9798, "step": 116 }, { - "epoch": 0.1, - "learning_rate": 0.000224, - "loss": 15.5222, + "epoch": 0.2, + "learning_rate": 0.000228, + "loss": 7.0059, "step": 117 }, { - "epoch": 0.1, - "learning_rate": 0.000224, - "loss": 15.1973, + "epoch": 0.2, + "learning_rate": 0.00023, + "loss": 6.9782, "step": 118 }, { - "epoch": 0.1, - "learning_rate": 0.000224, - "loss": 15.3933, + "epoch": 0.2, + "learning_rate": 0.00023200000000000003, + "loss": 7.0379, "step": 119 }, { - "epoch": 0.1, - "learning_rate": 0.00022600000000000002, - "loss": 15.2458, + "epoch": 0.2, + "learning_rate": 0.00023400000000000002, + "loss": 7.0151, "step": 120 }, { - "epoch": 0.1, - "learning_rate": 0.000228, - "loss": 7.3314, + "epoch": 0.2, + "learning_rate": 0.000236, + "loss": 7.1422, "step": 121 }, { - "epoch": 0.1, - "learning_rate": 0.00023, - "loss": 7.0395, + "epoch": 0.21, + "learning_rate": 0.00023799999999999998, + "loss": 7.1631, "step": 122 }, { - "epoch": 0.1, - "learning_rate": 0.00023200000000000003, - "loss": 7.1346, + "epoch": 0.21, + "learning_rate": 0.00024, + "loss": 7.0311, "step": 123 }, { - "epoch": 0.1, - "learning_rate": 0.00023400000000000002, - "loss": 7.0888, + "epoch": 0.21, + "learning_rate": 0.000242, + "loss": 7.0971, "step": 124 }, { - "epoch": 0.11, - "learning_rate": 0.000236, - "loss": 7.0469, + "epoch": 0.21, + "learning_rate": 0.000244, + "loss": 6.9513, "step": 125 }, { - "epoch": 0.11, - "learning_rate": 0.00023799999999999998, - "loss": 6.9381, + "epoch": 0.21, + "learning_rate": 0.000246, + "loss": 7.0559, "step": 126 }, { - "epoch": 0.11, - "learning_rate": 0.00024, - "loss": 7.252, + "epoch": 0.21, + "learning_rate": 0.000248, + "loss": 6.963, "step": 127 }, { - "epoch": 0.11, - "learning_rate": 0.000242, - "loss": 6.9697, + "epoch": 0.22, + "learning_rate": 0.00025, + "loss": 7.0279, "step": 128 }, { - "epoch": 0.11, - "learning_rate": 0.000244, - "loss": 7.0154, + "epoch": 0.22, + "learning_rate": 0.000252, + "loss": 7.0081, "step": 129 }, { - "epoch": 0.11, - "learning_rate": 0.000246, - "loss": 6.9832, + "epoch": 0.22, + "learning_rate": 0.000254, + "loss": 7.1089, "step": 130 }, { - "epoch": 0.11, - "learning_rate": 0.000248, - "loss": 6.8832, + "epoch": 0.22, + "learning_rate": 0.000256, + "loss": 7.0063, "step": 131 }, { - "epoch": 0.11, - "learning_rate": 0.00025, - "loss": 7.0214, + "epoch": 0.22, + "learning_rate": 0.00025800000000000004, + "loss": 6.9755, "step": 132 }, { - "epoch": 0.11, - "learning_rate": 0.000252, - "loss": 7.2379, + "epoch": 0.22, + "learning_rate": 0.00026000000000000003, + "loss": 7.0143, "step": 133 }, { - "epoch": 0.11, - "learning_rate": 0.000254, - "loss": 6.9883, + "epoch": 0.23, + "learning_rate": 0.000262, + "loss": 7.1515, "step": 134 }, { - "epoch": 0.11, - "learning_rate": 0.000256, - "loss": 7.0613, + "epoch": 0.23, + "learning_rate": 0.000264, + "loss": 7.0614, "step": 135 }, { - "epoch": 0.11, - "learning_rate": 0.00025800000000000004, - "loss": 6.9666, + "epoch": 0.23, + "learning_rate": 0.000266, + "loss": 7.1099, "step": 136 }, { - "epoch": 0.12, - "learning_rate": 0.00026000000000000003, - "loss": 6.9942, + "epoch": 0.23, + "learning_rate": 0.000268, + "loss": 6.9085, "step": 137 }, { - "epoch": 0.12, - "learning_rate": 0.000262, - "loss": 7.0373, + "epoch": 0.23, + "learning_rate": 0.00027, + "loss": 7.0223, "step": 138 }, { - "epoch": 0.12, - "learning_rate": 0.000264, - "loss": 6.9944, + "epoch": 0.23, + "learning_rate": 0.00027200000000000005, + "loss": 6.9545, "step": 139 }, { - "epoch": 0.12, - "learning_rate": 0.000266, - "loss": 7.1287, + "epoch": 0.24, + "learning_rate": 0.00027400000000000005, + "loss": 6.993, "step": 140 }, { - "epoch": 0.12, - "learning_rate": 0.000268, - "loss": 7.0394, + "epoch": 0.24, + "learning_rate": 0.00027600000000000004, + "loss": 7.0001, "step": 141 }, { - "epoch": 0.12, - "learning_rate": 0.00027, - "loss": 6.9535, + "epoch": 0.24, + "learning_rate": 0.00027800000000000004, + "loss": 7.1251, "step": 142 }, { - "epoch": 0.12, - "learning_rate": 0.00027200000000000005, - "loss": 6.9182, + "epoch": 0.24, + "learning_rate": 0.00028000000000000003, + "loss": 7.1047, "step": 143 }, { - "epoch": 0.12, - "learning_rate": 0.00027400000000000005, - "loss": 7.0463, + "epoch": 0.24, + "learning_rate": 0.00028199999999999997, + "loss": 6.7781, "step": 144 }, { - "epoch": 0.12, - "learning_rate": 0.00027600000000000004, - "loss": 6.8786, + "epoch": 0.24, + "learning_rate": 0.00028399999999999996, + "loss": 6.8235, "step": 145 }, { - "epoch": 0.12, - "learning_rate": 0.00027800000000000004, - "loss": 7.0804, + "epoch": 0.25, + "learning_rate": 0.00028599999999999996, + "loss": 7.1905, "step": 146 }, { - "epoch": 0.12, - "learning_rate": 0.00028000000000000003, - "loss": 6.7952, + "epoch": 0.25, + "learning_rate": 0.000288, + "loss": 6.8899, "step": 147 }, { - "epoch": 0.12, - "learning_rate": 0.00028199999999999997, - "loss": 6.5721, + "epoch": 0.25, + "learning_rate": 0.00029, + "loss": 6.7851, "step": 148 }, { - "epoch": 0.13, - "learning_rate": 0.00028399999999999996, - "loss": 6.7277, + "epoch": 0.25, + "learning_rate": 0.000292, + "loss": 6.8372, "step": 149 }, { - "epoch": 0.13, - "learning_rate": 0.00028599999999999996, - "loss": 6.6187, + "epoch": 0.25, + "learning_rate": 0.000294, + "loss": 6.4942, "step": 150 }, { - "epoch": 0.13, - "learning_rate": 0.000288, - "loss": 7.3634, + "epoch": 0.25, + "learning_rate": 0.000296, + "loss": 7.1939, "step": 151 }, { - "epoch": 0.13, - "learning_rate": 0.00029, - "loss": 7.408, + "epoch": 0.26, + "learning_rate": 0.000298, + "loss": 7.1799, "step": 152 }, { - "epoch": 0.13, - "learning_rate": 0.000292, - "loss": 7.1431, + "epoch": 0.26, + "learning_rate": 0.0003, + "loss": 7.1226, "step": 153 }, { - "epoch": 0.13, - "learning_rate": 0.000294, - "loss": 7.1948, + "epoch": 0.26, + "learning_rate": 0.000302, + "loss": 6.963, "step": 154 }, { - "epoch": 0.13, - "learning_rate": 0.000296, - "loss": 7.0712, + "epoch": 0.26, + "learning_rate": 0.000304, + "loss": 7.0494, "step": 155 }, { - "epoch": 0.13, - "learning_rate": 0.000298, - "loss": 7.0385, + "epoch": 0.26, + "learning_rate": 0.000306, + "loss": 7.0654, "step": 156 }, { - "epoch": 0.13, - "learning_rate": 0.0003, - "loss": 7.083, + "epoch": 0.26, + "learning_rate": 0.000308, + "loss": 7.0491, "step": 157 }, { - "epoch": 0.13, - "learning_rate": 0.000302, - "loss": 7.2521, + "epoch": 0.27, + "learning_rate": 0.00031, + "loss": 7.0751, "step": 158 }, { - "epoch": 0.13, - "learning_rate": 0.000304, - "loss": 6.987, + "epoch": 0.27, + "learning_rate": 0.000312, + "loss": 6.9384, "step": 159 }, { - "epoch": 0.13, - "learning_rate": 0.000306, - "loss": 6.951, + "epoch": 0.27, + "learning_rate": 0.000314, + "loss": 7.1189, "step": 160 }, { - "epoch": 0.14, - "learning_rate": 0.000308, - "loss": 7.0044, + "epoch": 0.27, + "learning_rate": 0.000316, + "loss": 6.9326, "step": 161 }, { - "epoch": 0.14, - "learning_rate": 0.00031, - "loss": 7.151, + "epoch": 0.27, + "learning_rate": 0.00031800000000000003, + "loss": 6.9468, "step": 162 }, { - "epoch": 0.14, - "learning_rate": 0.000312, - "loss": 7.2072, + "epoch": 0.27, + "learning_rate": 0.00032, + "loss": 7.0409, "step": 163 }, { - "epoch": 0.14, - "learning_rate": 0.000314, - "loss": 7.05, + "epoch": 0.28, + "learning_rate": 0.000322, + "loss": 7.0507, "step": 164 }, { - "epoch": 0.14, - "learning_rate": 0.000316, - "loss": 7.0243, + "epoch": 0.28, + "learning_rate": 0.000324, + "loss": 6.9408, "step": 165 }, { - "epoch": 0.14, - "learning_rate": 0.00031800000000000003, - "loss": 6.8716, + "epoch": 0.28, + "learning_rate": 0.000326, + "loss": 7.1385, "step": 166 }, { - "epoch": 0.14, - "learning_rate": 0.00032, - "loss": 7.0433, + "epoch": 0.28, + "learning_rate": 0.000328, + "loss": 7.0965, "step": 167 }, { - "epoch": 0.14, - "learning_rate": 0.000322, - "loss": 7.0543, + "epoch": 0.28, + "learning_rate": 0.00033, + "loss": 7.0572, "step": 168 }, { - "epoch": 0.14, - "learning_rate": 0.000324, - "loss": 7.0754, + "epoch": 0.28, + "learning_rate": 0.00033200000000000005, + "loss": 6.9633, "step": 169 }, { - "epoch": 0.14, - "learning_rate": 0.000326, - "loss": 7.0035, + "epoch": 0.29, + "learning_rate": 0.00033400000000000004, + "loss": 7.0541, "step": 170 }, { - "epoch": 0.14, - "learning_rate": 0.000328, - "loss": 7.0058, - "step": 171 + "epoch": 0.29, + "learning_rate": 0.00033600000000000004, + "loss": 6.9672, + "step": 171 }, { - "epoch": 0.14, - "learning_rate": 0.00033, - "loss": 7.03, + "epoch": 0.29, + "learning_rate": 0.00033800000000000003, + "loss": 6.8879, "step": 172 }, { - "epoch": 0.15, - "learning_rate": 0.00033200000000000005, - "loss": 6.8019, + "epoch": 0.29, + "learning_rate": 0.00034, + "loss": 6.9559, "step": 173 }, { - "epoch": 0.15, - "learning_rate": 0.00033400000000000004, - "loss": 7.0817, + "epoch": 0.29, + "learning_rate": 0.000342, + "loss": 6.8725, "step": 174 }, { - "epoch": 0.15, - "learning_rate": 0.00033600000000000004, - "loss": 7.0984, + "epoch": 0.29, + "learning_rate": 0.00034399999999999996, + "loss": 6.9745, "step": 175 }, { - "epoch": 0.15, - "learning_rate": 0.00033800000000000003, - "loss": 7.2515, + "epoch": 0.3, + "learning_rate": 0.000346, + "loss": 7.0163, "step": 176 }, { - "epoch": 0.15, - "learning_rate": 0.00034, - "loss": 7.1763, + "epoch": 0.3, + "learning_rate": 0.000348, + "loss": 7.0641, "step": 177 }, { - "epoch": 0.15, - "learning_rate": 0.000342, - "loss": 7.1449, + "epoch": 0.3, + "learning_rate": 0.00035, + "loss": 6.9334, "step": 178 }, { - "epoch": 0.15, - "learning_rate": 0.00034399999999999996, - "loss": 7.0247, + "epoch": 0.3, + "learning_rate": 0.000352, + "loss": 7.0079, "step": 179 }, { - "epoch": 0.15, - "learning_rate": 0.000346, - "loss": 7.0918, + "epoch": 0.3, + "learning_rate": 0.000354, + "loss": 7.0947, "step": 180 }, { - "epoch": 0.15, - "learning_rate": 0.000348, - "loss": 7.1613, + "epoch": 0.3, + "learning_rate": 0.000356, + "loss": 7.0143, "step": 181 }, { - "epoch": 0.15, - "learning_rate": 0.00035, - "loss": 7.2377, + "epoch": 0.31, + "learning_rate": 0.000358, + "loss": 7.087, "step": 182 }, { - "epoch": 0.15, - "learning_rate": 0.000352, - "loss": 6.9927, + "epoch": 0.31, + "learning_rate": 0.00035999999999999997, + "loss": 7.0131, "step": 183 }, { - "epoch": 0.15, - "learning_rate": 0.000354, - "loss": 7.0281, + "epoch": 0.31, + "learning_rate": 0.000362, + "loss": 6.8973, "step": 184 }, { - "epoch": 0.16, - "learning_rate": 0.000356, - "loss": 6.8501, + "epoch": 0.31, + "learning_rate": 0.000364, + "loss": 7.1537, "step": 185 }, { - "epoch": 0.16, - "learning_rate": 0.000358, - "loss": 6.9226, + "epoch": 0.31, + "learning_rate": 0.000366, + "loss": 6.9218, "step": 186 }, { - "epoch": 0.16, - "learning_rate": 0.00035999999999999997, - "loss": 7.1811, + "epoch": 0.31, + "learning_rate": 0.000368, + "loss": 7.0113, "step": 187 }, { - "epoch": 0.16, - "learning_rate": 0.000362, - "loss": 6.9784, + "epoch": 0.32, + "learning_rate": 0.00037, + "loss": 6.9827, "step": 188 }, { - "epoch": 0.16, - "learning_rate": 0.000364, - "loss": 7.0126, + "epoch": 0.32, + "learning_rate": 0.000372, + "loss": 7.0805, "step": 189 }, { - "epoch": 0.16, - "learning_rate": 0.000366, - "loss": 6.9648, + "epoch": 0.32, + "learning_rate": 0.000374, + "loss": 7.0587, "step": 190 }, { - "epoch": 0.16, - "learning_rate": 0.000368, - "loss": 7.178, + "epoch": 0.32, + "learning_rate": 0.00037600000000000003, + "loss": 7.0688, "step": 191 }, { - "epoch": 0.16, - "learning_rate": 0.00037, - "loss": 7.1971, + "epoch": 0.32, + "learning_rate": 0.000378, + "loss": 7.0879, "step": 192 }, { - "epoch": 0.16, - "learning_rate": 0.000372, - "loss": 7.0049, + "epoch": 0.32, + "learning_rate": 0.00038, + "loss": 6.9182, "step": 193 }, { - "epoch": 0.16, - "learning_rate": 0.000374, - "loss": 6.9962, + "epoch": 0.33, + "learning_rate": 0.000382, + "loss": 6.9707, "step": 194 }, { - "epoch": 0.16, - "learning_rate": 0.00037600000000000003, - "loss": 6.9091, + "epoch": 0.33, + "learning_rate": 0.000384, + "loss": 6.9151, "step": 195 }, { - "epoch": 0.16, - "learning_rate": 0.000378, - "loss": 6.7908, + "epoch": 0.33, + "learning_rate": 0.000386, + "loss": 6.7898, "step": 196 }, { - "epoch": 0.17, - "learning_rate": 0.00038, - "loss": 7.199, + "epoch": 0.33, + "learning_rate": 0.000388, + "loss": 6.9734, "step": 197 }, { - "epoch": 0.17, - "learning_rate": 0.000382, - "loss": 6.892, + "epoch": 0.33, + "learning_rate": 0.00039000000000000005, + "loss": 6.9121, "step": 198 }, { - "epoch": 0.17, - "learning_rate": 0.000384, - "loss": 6.8718, + "epoch": 0.33, + "learning_rate": 0.00039200000000000004, + "loss": 6.6485, "step": 199 }, { - "epoch": 0.17, - "learning_rate": 0.000386, - "loss": 6.4145, + "epoch": 0.34, + "learning_rate": 0.00039400000000000004, + "loss": 6.6154, "step": 200 }, { - "epoch": 0.17, - "learning_rate": 0.000388, - "loss": 7.1194, + "epoch": 0.34, + "learning_rate": 0.00039600000000000003, + "loss": 7.0778, "step": 201 }, { - "epoch": 0.17, - "learning_rate": 0.00039000000000000005, - "loss": 7.0583, + "epoch": 0.34, + "learning_rate": 0.000398, + "loss": 7.0853, "step": 202 }, { - "epoch": 0.17, - "learning_rate": 0.00039200000000000004, - "loss": 7.1171, + "epoch": 0.34, + "learning_rate": 0.0004, + "loss": 7.1074, "step": 203 }, { - "epoch": 0.17, - "learning_rate": 0.00039400000000000004, - "loss": 6.9629, + "epoch": 0.34, + "learning_rate": 0.000402, + "loss": 6.8628, "step": 204 }, { - "epoch": 0.17, - "learning_rate": 0.00039600000000000003, - "loss": 6.98, + "epoch": 0.34, + "learning_rate": 0.000404, + "loss": 6.9699, "step": 205 }, { - "epoch": 0.17, - "learning_rate": 0.000398, - "loss": 7.0801, + "epoch": 0.35, + "learning_rate": 0.00040600000000000006, + "loss": 6.9795, "step": 206 }, { - "epoch": 0.17, - "learning_rate": 0.0004, - "loss": 7.1003, + "epoch": 0.35, + "learning_rate": 0.000408, + "loss": 7.0912, "step": 207 }, { - "epoch": 0.17, - "learning_rate": 0.000402, - "loss": 7.2571, + "epoch": 0.35, + "learning_rate": 0.00041, + "loss": 7.0847, "step": 208 }, { - "epoch": 0.18, - "learning_rate": 0.000404, - "loss": 7.0932, + "epoch": 0.35, + "learning_rate": 0.000412, + "loss": 7.0106, "step": 209 }, { - "epoch": 0.18, - "learning_rate": 0.00040600000000000006, - "loss": 7.2553, + "epoch": 0.35, + "learning_rate": 0.000414, + "loss": 7.1056, "step": 210 }, { - "epoch": 0.18, - "learning_rate": 0.000408, - "loss": 7.0012, + "epoch": 0.35, + "learning_rate": 0.000416, + "loss": 7.0423, "step": 211 }, { - "epoch": 0.18, - "learning_rate": 0.00041, - "loss": 6.9778, + "epoch": 0.36, + "learning_rate": 0.00041799999999999997, + "loss": 7.0226, "step": 212 }, { - "epoch": 0.18, - "learning_rate": 0.000412, - "loss": 7.0422, + "epoch": 0.36, + "learning_rate": 0.00042, + "loss": 7.0216, "step": 213 }, { - "epoch": 0.18, - "learning_rate": 0.000414, - "loss": 7.0387, + "epoch": 0.36, + "learning_rate": 0.000422, + "loss": 7.0556, "step": 214 }, { - "epoch": 0.18, - "learning_rate": 0.000416, - "loss": 7.0497, + "epoch": 0.36, + "learning_rate": 0.000424, + "loss": 7.0019, "step": 215 }, { - "epoch": 0.18, - "learning_rate": 0.00041799999999999997, - "loss": 6.9891, + "epoch": 0.36, + "learning_rate": 0.000426, + "loss": 7.0848, "step": 216 }, { - "epoch": 0.18, - "learning_rate": 0.00042, - "loss": 7.0476, + "epoch": 0.36, + "learning_rate": 0.000428, + "loss": 6.8998, "step": 217 }, { - "epoch": 0.18, - "learning_rate": 0.000422, - "loss": 7.0848, + "epoch": 0.37, + "learning_rate": 0.00043, + "loss": 6.9194, "step": 218 }, { - "epoch": 0.18, - "learning_rate": 0.000424, - "loss": 6.9996, + "epoch": 0.37, + "learning_rate": 0.000432, + "loss": 6.9024, "step": 219 }, { - "epoch": 0.18, - "learning_rate": 0.000426, - "loss": 7.1819, + "epoch": 0.37, + "learning_rate": 0.00043400000000000003, + "loss": 7.1083, "step": 220 }, { - "epoch": 0.19, - "learning_rate": 0.000428, - "loss": 6.9997, + "epoch": 0.37, + "learning_rate": 0.000436, + "loss": 6.9852, "step": 221 }, { - "epoch": 0.19, - "learning_rate": 0.00043, - "loss": 7.3022, + "epoch": 0.37, + "learning_rate": 0.000438, + "loss": 7.0352, "step": 222 }, { - "epoch": 0.19, - "learning_rate": 0.000432, - "loss": 7.191, + "epoch": 0.37, + "learning_rate": 0.00044, + "loss": 7.0216, "step": 223 }, { - "epoch": 0.19, - "learning_rate": 0.00043400000000000003, - "loss": 7.1603, + "epoch": 0.38, + "learning_rate": 0.000442, + "loss": 7.0737, "step": 224 }, { - "epoch": 0.19, - "learning_rate": 0.000436, - "loss": 7.0109, + "epoch": 0.38, + "learning_rate": 0.000444, + "loss": 7.0748, "step": 225 }, { - "epoch": 0.19, - "learning_rate": 0.000438, - "loss": 6.8553, + "epoch": 0.38, + "learning_rate": 0.000446, + "loss": 6.9234, "step": 226 }, { - "epoch": 0.19, - "learning_rate": 0.00044, - "loss": 7.1697, + "epoch": 0.38, + "learning_rate": 0.000448, + "loss": 6.9104, "step": 227 }, { - "epoch": 0.19, - "learning_rate": 0.000442, - "loss": 7.0937, + "epoch": 0.38, + "learning_rate": 0.00045000000000000004, + "loss": 7.0043, "step": 228 }, { - "epoch": 0.19, - "learning_rate": 0.000444, - "loss": 7.0137, + "epoch": 0.39, + "learning_rate": 0.00045200000000000004, + "loss": 7.0571, "step": 229 }, { - "epoch": 0.19, - "learning_rate": 0.000446, - "loss": 7.0811, + "epoch": 0.39, + "learning_rate": 0.00045400000000000003, + "loss": 7.0416, "step": 230 }, { - "epoch": 0.19, - "learning_rate": 0.000448, - "loss": 7.1647, + "epoch": 0.39, + "learning_rate": 0.000456, + "loss": 7.0239, "step": 231 }, { - "epoch": 0.2, - "learning_rate": 0.00045000000000000004, - "loss": 7.071, + "epoch": 0.39, + "learning_rate": 0.000458, + "loss": 6.9304, "step": 232 }, { - "epoch": 0.2, - "learning_rate": 0.00045200000000000004, - "loss": 7.0748, + "epoch": 0.39, + "learning_rate": 0.00046, + "loss": 7.0238, "step": 233 }, { - "epoch": 0.2, - "learning_rate": 0.00045400000000000003, - "loss": 6.9141, + "epoch": 0.39, + "learning_rate": 0.000462, + "loss": 7.0501, "step": 234 }, { - "epoch": 0.2, - "learning_rate": 0.000456, - "loss": 7.139, + "epoch": 0.4, + "learning_rate": 0.00046400000000000006, + "loss": 7.0763, "step": 235 }, { - "epoch": 0.2, - "learning_rate": 0.000458, - "loss": 7.1141, + "epoch": 0.4, + "learning_rate": 0.00046600000000000005, + "loss": 6.893, "step": 236 }, { - "epoch": 0.2, - "learning_rate": 0.00046, - "loss": 7.0931, + "epoch": 0.4, + "learning_rate": 0.00046800000000000005, + "loss": 6.9541, "step": 237 }, { - "epoch": 0.2, - "learning_rate": 0.000462, - "loss": 7.0072, + "epoch": 0.4, + "learning_rate": 0.00047, + "loss": 6.9877, "step": 238 }, { - "epoch": 0.2, - "learning_rate": 0.00046400000000000006, - "loss": 7.0934, + "epoch": 0.4, + "learning_rate": 0.000472, + "loss": 7.0107, "step": 239 }, { - "epoch": 0.2, - "learning_rate": 0.00046600000000000005, - "loss": 6.9072, + "epoch": 0.4, + "learning_rate": 0.000474, + "loss": 7.0396, "step": 240 }, { - "epoch": 0.2, - "learning_rate": 0.00046800000000000005, - "loss": 7.0331, + "epoch": 0.41, + "learning_rate": 0.00047599999999999997, + "loss": 7.1206, "step": 241 }, { - "epoch": 0.2, - "learning_rate": 0.00047, - "loss": 7.0503, + "epoch": 0.41, + "learning_rate": 0.00047799999999999996, + "loss": 7.0092, "step": 242 }, { - "epoch": 0.2, - "learning_rate": 0.000472, - "loss": 7.1425, + "epoch": 0.41, + "learning_rate": 0.00048, + "loss": 7.0429, "step": 243 }, { - "epoch": 0.21, - "learning_rate": 0.000474, - "loss": 7.1577, + "epoch": 0.41, + "learning_rate": 0.000482, + "loss": 6.9709, "step": 244 }, { - "epoch": 0.21, - "learning_rate": 0.00047599999999999997, - "loss": 6.8863, + "epoch": 0.41, + "learning_rate": 0.000484, + "loss": 6.8753, "step": 245 }, { - "epoch": 0.21, - "learning_rate": 0.00047799999999999996, - "loss": 6.844, + "epoch": 0.41, + "learning_rate": 0.000486, + "loss": 6.8831, "step": 246 }, { - "epoch": 0.21, - "learning_rate": 0.00048, - "loss": 7.4113, + "epoch": 0.42, + "learning_rate": 0.000488, + "loss": 6.8629, "step": 247 }, { - "epoch": 0.21, - "learning_rate": 0.000482, - "loss": 6.7754, + "epoch": 0.42, + "learning_rate": 0.00049, + "loss": 6.905, "step": 248 }, { - "epoch": 0.21, - "learning_rate": 0.000484, - "loss": 6.6743, + "epoch": 0.42, + "learning_rate": 0.000492, + "loss": 6.7441, "step": 249 }, { - "epoch": 0.21, - "learning_rate": 0.000486, - "loss": 6.4596, + "epoch": 0.42, + "learning_rate": 0.000494, + "loss": 6.5621, "step": 250 }, { - "epoch": 0.21, - "learning_rate": 0.000488, - "loss": 7.2881, + "epoch": 0.42, + "learning_rate": 0.000496, + "loss": 7.2098, "step": 251 }, { - "epoch": 0.21, - "learning_rate": 0.00049, - "loss": 7.0383, + "epoch": 0.42, + "learning_rate": 0.000498, + "loss": 7.2084, "step": 252 }, { - "epoch": 0.21, - "learning_rate": 0.000492, - "loss": 7.2685, + "epoch": 0.43, + "learning_rate": 0.0005, + "loss": 7.0214, "step": 253 }, { - "epoch": 0.21, - "learning_rate": 0.000494, - "loss": 6.9829, + "epoch": 0.43, + "learning_rate": 0.0005020000000000001, + "loss": 7.0158, "step": 254 }, { - "epoch": 0.21, - "learning_rate": 0.000496, - "loss": 7.0622, + "epoch": 0.43, + "learning_rate": 0.000504, + "loss": 7.1246, "step": 255 }, { - "epoch": 0.22, - "learning_rate": 0.000498, - "loss": 7.1413, + "epoch": 0.43, + "learning_rate": 0.000506, + "loss": 7.0502, "step": 256 }, { - "epoch": 0.22, - "learning_rate": 0.0005, - "loss": 7.0308, + "epoch": 0.43, + "learning_rate": 0.000508, + "loss": 7.0624, "step": 257 }, { - "epoch": 0.22, - "learning_rate": 0.0005020000000000001, - "loss": 7.183, + "epoch": 0.43, + "learning_rate": 0.00051, + "loss": 6.9986, "step": 258 }, { - "epoch": 0.22, - "learning_rate": 0.000504, - "loss": 7.0245, + "epoch": 0.44, + "learning_rate": 0.000512, + "loss": 7.1072, "step": 259 }, { - "epoch": 0.22, - "learning_rate": 0.000506, - "loss": 6.9604, + "epoch": 0.44, + "learning_rate": 0.000514, + "loss": 7.0238, "step": 260 }, { - "epoch": 0.22, - "learning_rate": 0.000508, - "loss": 7.0466, + "epoch": 0.44, + "learning_rate": 0.0005160000000000001, + "loss": 7.0155, "step": 261 }, { - "epoch": 0.22, - "learning_rate": 0.00051, - "loss": 7.0001, + "epoch": 0.44, + "learning_rate": 0.000518, + "loss": 6.9996, "step": 262 }, { - "epoch": 0.22, - "learning_rate": 0.000512, - "loss": 6.9583, + "epoch": 0.44, + "learning_rate": 0.0005200000000000001, + "loss": 7.0465, "step": 263 }, { - "epoch": 0.22, - "learning_rate": 0.000514, - "loss": 7.0768, + "epoch": 0.44, + "learning_rate": 0.000522, + "loss": 6.8355, "step": 264 }, { - "epoch": 0.22, - "learning_rate": 0.0005160000000000001, - "loss": 7.0325, + "epoch": 0.45, + "learning_rate": 0.000524, + "loss": 6.9596, "step": 265 }, { - "epoch": 0.22, - "learning_rate": 0.000518, - "loss": 7.0483, + "epoch": 0.45, + "learning_rate": 0.000526, + "loss": 6.9608, "step": 266 }, { - "epoch": 0.22, - "learning_rate": 0.0005200000000000001, - "loss": 6.9711, + "epoch": 0.45, + "learning_rate": 0.000528, + "loss": 7.0486, "step": 267 }, { - "epoch": 0.23, - "learning_rate": 0.000522, - "loss": 6.9877, + "epoch": 0.45, + "learning_rate": 0.0005300000000000001, + "loss": 6.9112, "step": 268 }, { - "epoch": 0.23, - "learning_rate": 0.000524, - "loss": 7.0728, + "epoch": 0.45, + "learning_rate": 0.000532, + "loss": 7.0594, "step": 269 }, { - "epoch": 0.23, - "learning_rate": 0.000526, - "loss": 7.0505, + "epoch": 0.45, + "learning_rate": 0.0005340000000000001, + "loss": 6.9925, "step": 270 }, { - "epoch": 0.23, - "learning_rate": 0.000528, - "loss": 7.1279, + "epoch": 0.46, + "learning_rate": 0.000536, + "loss": 7.0339, "step": 271 }, { - "epoch": 0.23, - "learning_rate": 0.0005300000000000001, - "loss": 6.9928, + "epoch": 0.46, + "learning_rate": 0.0005380000000000001, + "loss": 7.0785, "step": 272 }, { - "epoch": 0.23, - "learning_rate": 0.000532, - "loss": 7.0933, + "epoch": 0.46, + "learning_rate": 0.00054, + "loss": 7.0008, "step": 273 }, { - "epoch": 0.23, - "learning_rate": 0.0005340000000000001, - "loss": 7.1199, + "epoch": 0.46, + "learning_rate": 0.0005420000000000001, + "loss": 6.976, "step": 274 }, { - "epoch": 0.23, - "learning_rate": 0.000536, - "loss": 7.1138, + "epoch": 0.46, + "learning_rate": 0.0005440000000000001, + "loss": 7.0215, "step": 275 }, { - "epoch": 0.23, - "learning_rate": 0.0005380000000000001, - "loss": 6.9262, + "epoch": 0.46, + "learning_rate": 0.000546, + "loss": 7.0251, "step": 276 }, { - "epoch": 0.23, - "learning_rate": 0.00054, - "loss": 7.0241, + "epoch": 0.47, + "learning_rate": 0.0005480000000000001, + "loss": 7.0722, "step": 277 }, { - "epoch": 0.23, - "learning_rate": 0.0005420000000000001, - "loss": 7.0357, + "epoch": 0.47, + "learning_rate": 0.00055, + "loss": 7.0051, "step": 278 }, { - "epoch": 0.23, - "learning_rate": 0.0005440000000000001, - "loss": 7.1868, + "epoch": 0.47, + "learning_rate": 0.0005520000000000001, + "loss": 7.0019, "step": 279 }, { - "epoch": 0.24, - "learning_rate": 0.000546, - "loss": 6.9221, + "epoch": 0.47, + "learning_rate": 0.000554, + "loss": 7.0538, "step": 280 }, { - "epoch": 0.24, - "learning_rate": 0.0005480000000000001, - "loss": 6.9587, + "epoch": 0.47, + "learning_rate": 0.0005560000000000001, + "loss": 7.0292, "step": 281 }, { - "epoch": 0.24, - "learning_rate": 0.00055, - "loss": 7.015, + "epoch": 0.47, + "learning_rate": 0.000558, + "loss": 7.0564, "step": 282 }, { - "epoch": 0.24, - "learning_rate": 0.0005520000000000001, - "loss": 7.2622, + "epoch": 0.48, + "learning_rate": 0.0005600000000000001, + "loss": 6.8884, "step": 283 }, { - "epoch": 0.24, - "learning_rate": 0.000554, - "loss": 7.1459, + "epoch": 0.48, + "learning_rate": 0.0005620000000000001, + "loss": 7.0051, "step": 284 }, { - "epoch": 0.24, - "learning_rate": 0.0005560000000000001, - "loss": 7.072, + "epoch": 0.48, + "learning_rate": 0.0005639999999999999, + "loss": 7.0457, "step": 285 }, { - "epoch": 0.24, - "learning_rate": 0.000558, - "loss": 7.0269, + "epoch": 0.48, + "learning_rate": 0.000566, + "loss": 6.9338, "step": 286 }, { - "epoch": 0.24, - "learning_rate": 0.0005600000000000001, - "loss": 6.9213, + "epoch": 0.48, + "learning_rate": 0.0005679999999999999, + "loss": 7.1379, "step": 287 }, { - "epoch": 0.24, - "learning_rate": 0.0005620000000000001, - "loss": 6.9281, + "epoch": 0.48, + "learning_rate": 0.00057, + "loss": 6.8807, "step": 288 }, { - "epoch": 0.24, - "learning_rate": 0.0005639999999999999, - "loss": 7.0964, + "epoch": 0.49, + "learning_rate": 0.0005719999999999999, + "loss": 6.9174, "step": 289 }, { - "epoch": 0.24, - "learning_rate": 0.000566, - "loss": 6.9473, + "epoch": 0.49, + "learning_rate": 0.000574, + "loss": 7.0579, "step": 290 }, { - "epoch": 0.24, - "learning_rate": 0.0005679999999999999, - "loss": 7.2092, + "epoch": 0.49, + "learning_rate": 0.000576, + "loss": 7.0882, "step": 291 }, { - "epoch": 0.25, - "learning_rate": 0.00057, - "loss": 7.1037, + "epoch": 0.49, + "learning_rate": 0.000578, + "loss": 6.9218, "step": 292 }, { - "epoch": 0.25, - "learning_rate": 0.0005719999999999999, - "loss": 6.7986, + "epoch": 0.49, + "learning_rate": 0.00058, + "loss": 6.9869, "step": 293 }, { - "epoch": 0.25, - "learning_rate": 0.000574, - "loss": 6.8426, + "epoch": 0.49, + "learning_rate": 0.0005819999999999999, + "loss": 7.0547, "step": 294 }, { - "epoch": 0.25, - "learning_rate": 0.000576, - "loss": 7.0094, + "epoch": 0.5, + "learning_rate": 0.000584, + "loss": 6.8407, "step": 295 }, { - "epoch": 0.25, - "learning_rate": 0.000578, - "loss": 7.1792, + "epoch": 0.5, + "learning_rate": 0.0005859999999999999, + "loss": 6.8458, "step": 296 }, { - "epoch": 0.25, - "learning_rate": 0.00058, - "loss": 6.8114, + "epoch": 0.5, + "learning_rate": 0.000588, + "loss": 6.7859, "step": 297 }, { - "epoch": 0.25, - "learning_rate": 0.0005819999999999999, - "loss": 6.8439, + "epoch": 0.5, + "learning_rate": 0.00059, + "loss": 6.8682, "step": 298 }, { - "epoch": 0.25, - "learning_rate": 0.000584, - "loss": 6.9772, + "epoch": 0.5, + "learning_rate": 0.000592, + "loss": 6.6934, "step": 299 }, { - "epoch": 0.25, - "learning_rate": 0.0005859999999999999, - "loss": 6.5952, + "epoch": 0.5, + "learning_rate": 0.000594, + "loss": 6.5845, "step": 300 }, { - "epoch": 0.25, - "learning_rate": 0.000588, - "loss": 7.1037, + "epoch": 0.51, + "learning_rate": 0.000596, + "loss": 7.236, "step": 301 }, { - "epoch": 0.25, - "learning_rate": 0.00059, - "loss": 7.229, + "epoch": 0.51, + "learning_rate": 0.000598, + "loss": 7.2218, "step": 302 }, { - "epoch": 0.25, - "learning_rate": 0.000592, - "loss": 7.2436, + "epoch": 0.51, + "learning_rate": 0.0006, + "loss": 7.1656, "step": 303 }, { - "epoch": 0.26, - "learning_rate": 0.000594, - "loss": 6.9555, + "epoch": 0.51, + "learning_rate": 0.000602, + "loss": 7.0663, "step": 304 }, { - "epoch": 0.26, - "learning_rate": 0.000596, - "loss": 7.0729, + "epoch": 0.51, + "learning_rate": 0.000604, + "loss": 7.1259, "step": 305 }, { - "epoch": 0.26, - "learning_rate": 0.000598, - "loss": 7.0202, + "epoch": 0.51, + "learning_rate": 0.000606, + "loss": 7.1706, "step": 306 }, { - "epoch": 0.26, - "learning_rate": 0.0006, - "loss": 7.1313, + "epoch": 0.52, + "learning_rate": 0.000608, + "loss": 7.0302, "step": 307 }, { - "epoch": 0.26, - "learning_rate": 0.000602, - "loss": 6.9928, + "epoch": 0.52, + "learning_rate": 0.00061, + "loss": 7.132, "step": 308 }, { - "epoch": 0.26, - "learning_rate": 0.000604, - "loss": 6.8953, + "epoch": 0.52, + "learning_rate": 0.000612, + "loss": 7.0026, "step": 309 }, { - "epoch": 0.26, - "learning_rate": 0.000606, - "loss": 7.1579, + "epoch": 0.52, + "learning_rate": 0.000614, + "loss": 7.0629, "step": 310 }, { - "epoch": 0.26, - "learning_rate": 0.000608, - "loss": 6.8321, + "epoch": 0.52, + "learning_rate": 0.000616, + "loss": 6.9223, "step": 311 }, { - "epoch": 0.26, - "learning_rate": 0.00061, - "loss": 6.9062, + "epoch": 0.52, + "learning_rate": 0.0006180000000000001, + "loss": 7.16, "step": 312 }, { - "epoch": 0.26, - "learning_rate": 0.000612, - "loss": 7.0055, + "epoch": 0.53, + "learning_rate": 0.00062, + "loss": 6.9756, "step": 313 }, { - "epoch": 0.26, - "learning_rate": 0.000614, - "loss": 7.0492, + "epoch": 0.53, + "learning_rate": 0.000622, + "loss": 6.9914, "step": 314 }, { - "epoch": 0.26, - "learning_rate": 0.000616, - "loss": 6.9698, + "epoch": 0.53, + "learning_rate": 0.000624, + "loss": 7.018, "step": 315 }, { - "epoch": 0.27, - "learning_rate": 0.0006180000000000001, - "loss": 7.1945, + "epoch": 0.53, + "learning_rate": 0.000626, + "loss": 6.948, "step": 316 }, { - "epoch": 0.27, - "learning_rate": 0.00062, - "loss": 7.0507, + "epoch": 0.53, + "learning_rate": 0.000628, + "loss": 6.9204, "step": 317 }, { - "epoch": 0.27, - "learning_rate": 0.000622, - "loss": 7.07, + "epoch": 0.53, + "learning_rate": 0.00063, + "loss": 7.1444, "step": 318 }, { - "epoch": 0.27, - "learning_rate": 0.000624, - "loss": 6.9557, + "epoch": 0.54, + "learning_rate": 0.000632, + "loss": 7.0219, "step": 319 }, { - "epoch": 0.27, - "learning_rate": 0.000626, - "loss": 6.956, + "epoch": 0.54, + "learning_rate": 0.000634, + "loss": 7.0609, "step": 320 }, { - "epoch": 0.27, - "learning_rate": 0.000628, - "loss": 6.9846, + "epoch": 0.54, + "learning_rate": 0.0006360000000000001, + "loss": 6.9684, "step": 321 }, { - "epoch": 0.27, - "learning_rate": 0.00063, - "loss": 6.9683, + "epoch": 0.54, + "learning_rate": 0.000638, + "loss": 6.9689, "step": 322 }, { - "epoch": 0.27, - "learning_rate": 0.000632, - "loss": 6.9356, + "epoch": 0.54, + "learning_rate": 0.00064, + "loss": 6.9515, "step": 323 }, { - "epoch": 0.27, - "learning_rate": 0.000634, - "loss": 7.0013, + "epoch": 0.54, + "learning_rate": 0.000642, + "loss": 6.9158, "step": 324 }, { - "epoch": 0.27, - "learning_rate": 0.0006360000000000001, - "loss": 6.8627, + "epoch": 0.55, + "learning_rate": 0.000644, + "loss": 7.1327, "step": 325 }, { - "epoch": 0.27, - "learning_rate": 0.000638, - "loss": 7.0897, + "epoch": 0.55, + "learning_rate": 0.000646, + "loss": 7.0171, "step": 326 }, { - "epoch": 0.27, - "learning_rate": 0.00064, - "loss": 7.0129, + "epoch": 0.55, + "learning_rate": 0.000648, + "loss": 7.1963, "step": 327 }, { - "epoch": 0.28, - "learning_rate": 0.000642, - "loss": 7.0737, + "epoch": 0.55, + "learning_rate": 0.0006500000000000001, + "loss": 7.0119, "step": 328 }, { - "epoch": 0.28, - "learning_rate": 0.000644, - "loss": 6.9392, + "epoch": 0.55, + "learning_rate": 0.000652, + "loss": 6.9315, "step": 329 }, { - "epoch": 0.28, - "learning_rate": 0.000646, - "loss": 7.1556, + "epoch": 0.55, + "learning_rate": 0.0006540000000000001, + "loss": 7.0296, "step": 330 }, { - "epoch": 0.28, - "learning_rate": 0.000648, - "loss": 7.0289, + "epoch": 0.56, + "learning_rate": 0.000656, + "loss": 6.9909, "step": 331 }, { - "epoch": 0.28, - "learning_rate": 0.0006500000000000001, - "loss": 7.0916, + "epoch": 0.56, + "learning_rate": 0.0006580000000000001, + "loss": 6.9743, "step": 332 }, { - "epoch": 0.28, - "learning_rate": 0.000652, - "loss": 7.1824, + "epoch": 0.56, + "learning_rate": 0.00066, + "loss": 6.9316, "step": 333 }, { - "epoch": 0.28, - "learning_rate": 0.0006540000000000001, - "loss": 6.9691, + "epoch": 0.56, + "learning_rate": 0.000662, + "loss": 7.0359, "step": 334 }, { - "epoch": 0.28, - "learning_rate": 0.000656, - "loss": 7.0728, + "epoch": 0.56, + "learning_rate": 0.0006640000000000001, + "loss": 7.0403, "step": 335 }, { - "epoch": 0.28, - "learning_rate": 0.0006580000000000001, - "loss": 7.121, + "epoch": 0.56, + "learning_rate": 0.000666, + "loss": 6.9493, "step": 336 }, { - "epoch": 0.28, - "learning_rate": 0.00066, - "loss": 6.8791, + "epoch": 0.57, + "learning_rate": 0.0006680000000000001, + "loss": 6.991, "step": 337 }, { - "epoch": 0.28, - "learning_rate": 0.000662, - "loss": 7.1717, + "epoch": 0.57, + "learning_rate": 0.00067, + "loss": 6.9762, "step": 338 }, { - "epoch": 0.28, - "learning_rate": 0.0006640000000000001, - "loss": 6.9594, + "epoch": 0.57, + "learning_rate": 0.0006720000000000001, + "loss": 7.0749, "step": 339 }, { - "epoch": 0.29, - "learning_rate": 0.000666, - "loss": 7.0685, + "epoch": 0.57, + "learning_rate": 0.000674, + "loss": 7.1129, "step": 340 }, { - "epoch": 0.29, - "learning_rate": 0.0006680000000000001, - "loss": 6.9809, + "epoch": 0.57, + "learning_rate": 0.0006760000000000001, + "loss": 6.9891, "step": 341 }, { - "epoch": 0.29, - "learning_rate": 0.00067, - "loss": 7.1153, + "epoch": 0.58, + "learning_rate": 0.0006780000000000001, + "loss": 6.9923, "step": 342 }, { - "epoch": 0.29, - "learning_rate": 0.0006720000000000001, - "loss": 6.9467, + "epoch": 0.58, + "learning_rate": 0.00068, + "loss": 6.8966, "step": 343 }, { - "epoch": 0.29, - "learning_rate": 0.000674, - "loss": 7.0976, + "epoch": 0.58, + "learning_rate": 0.0006820000000000001, + "loss": 7.0609, "step": 344 }, { - "epoch": 0.29, - "learning_rate": 0.0006760000000000001, - "loss": 6.8509, + "epoch": 0.58, + "learning_rate": 0.000684, + "loss": 6.9641, "step": 345 }, { - "epoch": 0.29, - "learning_rate": 0.0006780000000000001, - "loss": 6.8434, + "epoch": 0.58, + "learning_rate": 0.0006860000000000001, + "loss": 6.7934, "step": 346 }, { - "epoch": 0.29, - "learning_rate": 0.00068, - "loss": 7.018, + "epoch": 0.58, + "learning_rate": 0.0006879999999999999, + "loss": 6.814, "step": 347 }, { - "epoch": 0.29, - "learning_rate": 0.0006820000000000001, + "epoch": 0.59, + "learning_rate": 0.00069, "loss": 7.1134, "step": 348 }, { - "epoch": 0.29, - "learning_rate": 0.000684, - "loss": 6.7391, + "epoch": 0.59, + "learning_rate": 0.000692, + "loss": 6.7713, "step": 349 }, { - "epoch": 0.29, - "learning_rate": 0.0006860000000000001, - "loss": 6.9186, + "epoch": 0.59, + "learning_rate": 0.000694, + "loss": 6.7095, "step": 350 }, { - "epoch": 0.3, - "learning_rate": 0.0006879999999999999, - "loss": 7.3938, + "epoch": 0.59, + "learning_rate": 0.000696, + "loss": 7.2116, "step": 351 }, { - "epoch": 0.3, - "learning_rate": 0.00069, - "loss": 7.2696, + "epoch": 0.59, + "learning_rate": 0.0006979999999999999, + "loss": 7.1415, "step": 352 }, { - "epoch": 0.3, - "learning_rate": 0.000692, - "loss": 7.1135, + "epoch": 0.59, + "learning_rate": 0.0007, + "loss": 7.0851, "step": 353 }, { - "epoch": 0.3, - "learning_rate": 0.000694, - "loss": 7.1692, + "epoch": 0.6, + "learning_rate": 0.0007019999999999999, + "loss": 7.0795, "step": 354 }, { - "epoch": 0.3, - "learning_rate": 0.000696, - "loss": 7.0229, + "epoch": 0.6, + "learning_rate": 0.000704, + "loss": 7.0815, "step": 355 }, { - "epoch": 0.3, - "learning_rate": 0.0006979999999999999, - "loss": 7.1764, + "epoch": 0.6, + "learning_rate": 0.0007059999999999999, + "loss": 7.1496, "step": 356 }, { - "epoch": 0.3, - "learning_rate": 0.0007, - "loss": 7.1834, + "epoch": 0.6, + "learning_rate": 0.000708, + "loss": 7.0391, "step": 357 }, { - "epoch": 0.3, - "learning_rate": 0.0007019999999999999, - "loss": 6.9568, + "epoch": 0.6, + "learning_rate": 0.00071, + "loss": 7.0427, "step": 358 }, { - "epoch": 0.3, - "learning_rate": 0.000704, - "loss": 7.0929, + "epoch": 0.6, + "learning_rate": 0.000712, + "loss": 7.006, "step": 359 }, { - "epoch": 0.3, - "learning_rate": 0.0007059999999999999, - "loss": 7.1641, + "epoch": 0.61, + "learning_rate": 0.000714, + "loss": 7.0213, "step": 360 }, { - "epoch": 0.3, - "learning_rate": 0.000708, - "loss": 6.9673, + "epoch": 0.61, + "learning_rate": 0.000716, + "loss": 7.0563, "step": 361 }, { - "epoch": 0.3, - "learning_rate": 0.00071, - "loss": 6.9279, + "epoch": 0.61, + "learning_rate": 0.000718, + "loss": 6.9326, "step": 362 }, { - "epoch": 0.31, - "learning_rate": 0.000712, - "loss": 7.1738, + "epoch": 0.61, + "learning_rate": 0.0007199999999999999, + "loss": 285.9326, "step": 363 }, { - "epoch": 0.31, - "learning_rate": 0.000714, - "loss": 7.1017, + "epoch": 0.61, + "learning_rate": 0.000722, + "loss": 7.0385, "step": 364 }, { - "epoch": 0.31, - "learning_rate": 0.000716, - "loss": 6.9808, + "epoch": 0.61, + "learning_rate": 0.000724, + "loss": 7.1396, "step": 365 }, { - "epoch": 0.31, - "learning_rate": 0.000718, - "loss": 7.0973, + "epoch": 0.62, + "learning_rate": 0.000726, + "loss": 6.8595, "step": 366 }, { - "epoch": 0.31, - "learning_rate": 0.0007199999999999999, - "loss": 7.0794, + "epoch": 0.62, + "learning_rate": 0.000728, + "loss": 7.1262, "step": 367 }, { - "epoch": 0.31, - "learning_rate": 0.000722, - "loss": 7.1846, + "epoch": 0.62, + "learning_rate": 0.00073, + "loss": 6.9156, "step": 368 }, { - "epoch": 0.31, - "learning_rate": 0.000724, - "loss": 7.0808, + "epoch": 0.62, + "learning_rate": 0.000732, + "loss": 6.9526, "step": 369 }, { - "epoch": 0.31, - "learning_rate": 0.000726, - "loss": 7.1264, + "epoch": 0.62, + "learning_rate": 0.000734, + "loss": 7.0985, "step": 370 }, { - "epoch": 0.31, - "learning_rate": 0.000728, - "loss": 6.9534, + "epoch": 0.62, + "learning_rate": 0.000736, + "loss": 6.993, "step": 371 }, { - "epoch": 0.31, - "learning_rate": 0.00073, - "loss": 6.9788, + "epoch": 0.63, + "learning_rate": 0.000738, + "loss": 6.9229, "step": 372 }, { - "epoch": 0.31, - "learning_rate": 0.000732, - "loss": 6.9074, + "epoch": 0.63, + "learning_rate": 0.00074, + "loss": 6.9861, "step": 373 }, { - "epoch": 0.31, - "learning_rate": 0.000734, - "loss": 6.9939, + "epoch": 0.63, + "learning_rate": 0.000742, + "loss": 6.911, "step": 374 }, { - "epoch": 0.32, - "learning_rate": 0.000736, - "loss": 6.9457, + "epoch": 0.63, + "learning_rate": 0.000744, + "loss": 7.0069, "step": 375 }, { - "epoch": 0.32, - "learning_rate": 0.000738, - "loss": 7.111, + "epoch": 0.63, + "learning_rate": 0.000746, + "loss": 6.9277, "step": 376 }, { - "epoch": 0.32, - "learning_rate": 0.00074, - "loss": 6.9439, + "epoch": 0.63, + "learning_rate": 0.000748, + "loss": 6.956, "step": 377 }, { - "epoch": 0.32, - "learning_rate": 0.000742, - "loss": 7.0072, + "epoch": 0.64, + "learning_rate": 0.00075, + "loss": 7.0714, "step": 378 }, { - "epoch": 0.32, - "learning_rate": 0.000744, - "loss": 7.0979, + "epoch": 0.64, + "learning_rate": 0.0007520000000000001, + "loss": 6.9807, "step": 379 }, { - "epoch": 0.32, - "learning_rate": 0.000746, - "loss": 7.0279, + "epoch": 0.64, + "learning_rate": 0.000754, + "loss": 7.108, "step": 380 }, { - "epoch": 0.32, - "learning_rate": 0.000748, - "loss": 7.0169, + "epoch": 0.64, + "learning_rate": 0.000756, + "loss": 7.0734, "step": 381 }, { - "epoch": 0.32, - "learning_rate": 0.00075, - "loss": 7.0453, + "epoch": 0.64, + "learning_rate": 0.000758, + "loss": 7.0057, "step": 382 }, { - "epoch": 0.32, - "learning_rate": 0.0007520000000000001, - "loss": 6.8604, + "epoch": 0.64, + "learning_rate": 0.00076, + "loss": 6.9243, "step": 383 }, { - "epoch": 0.32, - "learning_rate": 0.000754, - "loss": 7.2136, + "epoch": 0.65, + "learning_rate": 0.000762, + "loss": 7.031, "step": 384 }, { - "epoch": 0.32, - "learning_rate": 0.000756, - "loss": 7.0006, + "epoch": 0.65, + "learning_rate": 0.000764, + "loss": 7.0799, "step": 385 }, { - "epoch": 0.32, - "learning_rate": 0.000758, - "loss": 6.8976, + "epoch": 0.65, + "learning_rate": 0.0007660000000000001, + "loss": 7.0769, "step": 386 }, { - "epoch": 0.33, - "learning_rate": 0.00076, - "loss": 6.9871, + "epoch": 0.65, + "learning_rate": 0.000768, + "loss": 7.1266, "step": 387 }, { - "epoch": 0.33, - "learning_rate": 0.000762, - "loss": 7.0362, + "epoch": 0.65, + "learning_rate": 0.0007700000000000001, + "loss": 7.0542, "step": 388 }, { - "epoch": 0.33, - "learning_rate": 0.000764, - "loss": 7.1718, + "epoch": 0.65, + "learning_rate": 0.000772, + "loss": 6.965, "step": 389 }, { - "epoch": 0.33, - "learning_rate": 0.0007660000000000001, - "loss": 7.3146, + "epoch": 0.66, + "learning_rate": 0.0007740000000000001, + "loss": 7.0061, "step": 390 }, { - "epoch": 0.33, - "learning_rate": 0.000768, - "loss": 7.1342, + "epoch": 0.66, + "learning_rate": 0.000776, + "loss": 7.089, "step": 391 }, { - "epoch": 0.33, - "learning_rate": 0.0007700000000000001, - "loss": 6.9868, + "epoch": 0.66, + "learning_rate": 0.000778, + "loss": 7.0489, "step": 392 }, { - "epoch": 0.33, - "learning_rate": 0.000772, - "loss": 6.8952, + "epoch": 0.66, + "learning_rate": 0.0007800000000000001, + "loss": 7.0362, "step": 393 }, { - "epoch": 0.33, - "learning_rate": 0.0007740000000000001, - "loss": 7.0074, + "epoch": 0.66, + "learning_rate": 0.000782, + "loss": 6.9978, "step": 394 }, { - "epoch": 0.33, - "learning_rate": 0.000776, - "loss": 6.8717, + "epoch": 0.66, + "learning_rate": 0.0007840000000000001, + "loss": 6.7938, "step": 395 }, { - "epoch": 0.33, - "learning_rate": 0.000778, - "loss": 6.9778, + "epoch": 0.67, + "learning_rate": 0.000786, + "loss": 6.7785, "step": 396 }, { - "epoch": 0.33, - "learning_rate": 0.0007800000000000001, - "loss": 6.7461, + "epoch": 0.67, + "learning_rate": 0.0007880000000000001, + "loss": 6.9116, "step": 397 }, { - "epoch": 0.33, - "learning_rate": 0.000782, - "loss": 6.7326, + "epoch": 0.67, + "learning_rate": 0.00079, + "loss": 7.0112, "step": 398 }, { - "epoch": 0.34, - "learning_rate": 0.0007840000000000001, - "loss": 6.6938, + "epoch": 0.67, + "learning_rate": 0.0007920000000000001, + "loss": 6.6276, "step": 399 }, { - "epoch": 0.34, - "learning_rate": 0.000786, - "loss": 6.4652, + "epoch": 0.67, + "learning_rate": 0.0007940000000000001, + "loss": 6.4829, "step": 400 }, { - "epoch": 0.34, - "learning_rate": 0.0007880000000000001, - "loss": 7.2434, + "epoch": 0.67, + "learning_rate": 0.000796, + "loss": 7.3835, "step": 401 }, { - "epoch": 0.34, - "learning_rate": 0.00079, - "loss": 7.1463, + "epoch": 0.68, + "learning_rate": 0.0007980000000000001, + "loss": 7.2009, "step": 402 }, { - "epoch": 0.34, - "learning_rate": 0.0007920000000000001, - "loss": 7.1114, + "epoch": 0.68, + "learning_rate": 0.0008, + "loss": 7.1478, "step": 403 }, { - "epoch": 0.34, - "learning_rate": 0.0007940000000000001, - "loss": 7.2073, + "epoch": 0.68, + "learning_rate": 0.0008020000000000001, + "loss": 7.112, "step": 404 }, { - "epoch": 0.34, - "learning_rate": 0.000796, - "loss": 7.0008, + "epoch": 0.68, + "learning_rate": 0.000804, + "loss": 7.0844, "step": 405 }, { - "epoch": 0.34, - "learning_rate": 0.0007980000000000001, - "loss": 7.0475, + "epoch": 0.68, + "learning_rate": 0.0008060000000000001, + "loss": 6.9843, "step": 406 }, { - "epoch": 0.34, - "learning_rate": 0.0008, - "loss": 7.0595, + "epoch": 0.68, + "learning_rate": 0.000808, + "loss": 6.9488, "step": 407 }, { - "epoch": 0.34, - "learning_rate": 0.0008020000000000001, - "loss": 7.1738, + "epoch": 0.69, + "learning_rate": 0.0008100000000000001, + "loss": 7.1111, "step": 408 }, { - "epoch": 0.34, - "learning_rate": 0.000804, - "loss": 8.4566, + "epoch": 0.69, + "learning_rate": 0.0008120000000000001, + "loss": 6.9722, "step": 409 }, { - "epoch": 0.34, - "learning_rate": 0.0008060000000000001, - "loss": 7.151, + "epoch": 0.69, + "learning_rate": 0.0008139999999999999, + "loss": 6.9885, "step": 410 }, { - "epoch": 0.35, - "learning_rate": 0.000808, - "loss": 7.1658, + "epoch": 0.69, + "learning_rate": 0.000816, + "loss": 7.0014, "step": 411 }, { - "epoch": 0.35, - "learning_rate": 0.0008100000000000001, - "loss": 7.1185, + "epoch": 0.69, + "learning_rate": 0.0008179999999999999, + "loss": 7.0313, "step": 412 }, { - "epoch": 0.35, - "learning_rate": 0.0008120000000000001, - "loss": 7.061, + "epoch": 0.69, + "learning_rate": 0.00082, + "loss": 6.8849, "step": 413 }, { - "epoch": 0.35, - "learning_rate": 0.0008139999999999999, - "loss": 6.9386, + "epoch": 0.7, + "learning_rate": 0.0008219999999999999, + "loss": 7.0015, "step": 414 }, { - "epoch": 0.35, - "learning_rate": 0.000816, - "loss": 7.0055, + "epoch": 0.7, + "learning_rate": 0.000824, + "loss": 7.1869, "step": 415 }, { - "epoch": 0.35, - "learning_rate": 0.0008179999999999999, - "loss": 7.2785, + "epoch": 0.7, + "learning_rate": 0.000826, + "loss": 6.9322, "step": 416 }, { - "epoch": 0.35, - "learning_rate": 0.00082, - "loss": 6.9835, + "epoch": 0.7, + "learning_rate": 0.000828, + "loss": 7.0145, "step": 417 }, { - "epoch": 0.35, - "learning_rate": 0.0008219999999999999, - "loss": 6.8751, + "epoch": 0.7, + "learning_rate": 0.00083, + "loss": 6.812, "step": 418 }, { - "epoch": 0.35, - "learning_rate": 0.000824, - "loss": 6.9674, + "epoch": 0.7, + "learning_rate": 0.000832, + "loss": 6.9888, "step": 419 }, { - "epoch": 0.35, - "learning_rate": 0.000826, - "loss": 7.1833, + "epoch": 0.71, + "learning_rate": 0.000834, + "loss": 7.0677, "step": 420 }, { - "epoch": 0.35, - "learning_rate": 0.000828, - "loss": 7.0289, + "epoch": 0.71, + "learning_rate": 0.0008359999999999999, + "loss": 6.9418, "step": 421 }, { - "epoch": 0.35, - "learning_rate": 0.00083, - "loss": 6.9144, + "epoch": 0.71, + "learning_rate": 0.000838, + "loss": 6.9906, "step": 422 }, { - "epoch": 0.36, - "learning_rate": 0.000832, - "loss": 7.0525, + "epoch": 0.71, + "learning_rate": 0.00084, + "loss": 7.0385, "step": 423 }, { - "epoch": 0.36, - "learning_rate": 0.000834, - "loss": 6.9794, + "epoch": 0.71, + "learning_rate": 0.000842, + "loss": 7.0314, "step": 424 }, { - "epoch": 0.36, - "learning_rate": 0.0008359999999999999, - "loss": 7.2218, + "epoch": 0.71, + "learning_rate": 0.000844, + "loss": 6.9856, "step": 425 }, { - "epoch": 0.36, - "learning_rate": 0.000838, - "loss": 7.031, + "epoch": 0.72, + "learning_rate": 0.000846, + "loss": 7.0841, "step": 426 }, { - "epoch": 0.36, - "learning_rate": 0.00084, - "loss": 7.0031, + "epoch": 0.72, + "learning_rate": 0.000848, + "loss": 6.9897, "step": 427 }, { - "epoch": 0.36, - "learning_rate": 0.000842, - "loss": 7.0494, + "epoch": 0.72, + "learning_rate": 0.00085, + "loss": 6.9838, "step": 428 }, { - "epoch": 0.36, - "learning_rate": 0.000844, - "loss": 7.1181, + "epoch": 0.72, + "learning_rate": 0.000852, + "loss": 7.1931, "step": 429 }, { - "epoch": 0.36, - "learning_rate": 0.000846, - "loss": 7.0683, + "epoch": 0.72, + "learning_rate": 0.000854, + "loss": 6.9795, "step": 430 }, { - "epoch": 0.36, - "learning_rate": 0.000848, - "loss": 7.1466, + "epoch": 0.72, + "learning_rate": 0.000856, + "loss": 6.9566, "step": 431 }, { - "epoch": 0.36, - "learning_rate": 0.00085, - "loss": 6.8249, + "epoch": 0.73, + "learning_rate": 0.000858, + "loss": 6.8935, "step": 432 }, { - "epoch": 0.36, - "learning_rate": 0.000852, - "loss": 7.0882, + "epoch": 0.73, + "learning_rate": 0.00086, + "loss": 7.0856, "step": 433 }, { - "epoch": 0.36, - "learning_rate": 0.000854, - "loss": 7.0593, + "epoch": 0.73, + "learning_rate": 0.000862, + "loss": 7.0538, "step": 434 }, { - "epoch": 0.37, - "learning_rate": 0.000856, - "loss": 7.088, + "epoch": 0.73, + "learning_rate": 0.000864, + "loss": 7.1257, "step": 435 }, { - "epoch": 0.37, - "learning_rate": 0.000858, - "loss": 6.8876, + "epoch": 0.73, + "learning_rate": 0.000866, + "loss": 6.9539, "step": 436 }, { - "epoch": 0.37, - "learning_rate": 0.00086, - "loss": 6.9609, + "epoch": 0.73, + "learning_rate": 0.0008680000000000001, + "loss": 6.9312, "step": 437 }, { - "epoch": 0.37, - "learning_rate": 0.000862, - "loss": 6.9152, + "epoch": 0.74, + "learning_rate": 0.00087, + "loss": 7.0382, "step": 438 }, { - "epoch": 0.37, - "learning_rate": 0.000864, - "loss": 7.2055, + "epoch": 0.74, + "learning_rate": 0.000872, + "loss": 7.0195, "step": 439 }, { - "epoch": 0.37, - "learning_rate": 0.000866, - "loss": 6.99, + "epoch": 0.74, + "learning_rate": 0.000874, + "loss": 7.1437, "step": 440 }, { - "epoch": 0.37, - "learning_rate": 0.0008680000000000001, - "loss": 7.0483, + "epoch": 0.74, + "learning_rate": 0.000876, + "loss": 6.9629, "step": 441 }, { - "epoch": 0.37, - "learning_rate": 0.00087, - "loss": 6.9639, + "epoch": 0.74, + "learning_rate": 0.000878, + "loss": 7.1108, "step": 442 }, { - "epoch": 0.37, - "learning_rate": 0.000872, - "loss": 7.0258, + "epoch": 0.74, + "learning_rate": 0.00088, + "loss": 6.9873, "step": 443 }, { - "epoch": 0.37, - "learning_rate": 0.000874, - "loss": 6.9092, + "epoch": 0.75, + "learning_rate": 0.000882, + "loss": 6.8853, "step": 444 }, { - "epoch": 0.37, - "learning_rate": 0.000876, - "loss": 6.7097, + "epoch": 0.75, + "learning_rate": 0.000884, + "loss": 6.8583, "step": 445 }, { - "epoch": 0.37, - "learning_rate": 0.000878, - "loss": 6.9751, + "epoch": 0.75, + "learning_rate": 0.0008860000000000001, + "loss": 7.0631, "step": 446 }, { - "epoch": 0.38, - "learning_rate": 0.00088, - "loss": 6.9166, + "epoch": 0.75, + "learning_rate": 0.000888, + "loss": 6.8263, "step": 447 }, { - "epoch": 0.38, - "learning_rate": 0.000882, - "loss": 6.9943, + "epoch": 0.75, + "learning_rate": 0.0008900000000000001, + "loss": 6.6845, "step": 448 }, { - "epoch": 0.38, - "learning_rate": 0.000884, - "loss": 6.7797, + "epoch": 0.75, + "learning_rate": 0.000892, + "loss": 6.8607, "step": 449 }, { - "epoch": 0.38, - "learning_rate": 0.0008860000000000001, - "loss": 6.5635, + "epoch": 0.76, + "learning_rate": 0.000894, + "loss": 6.5526, "step": 450 }, { - "epoch": 0.38, - "learning_rate": 0.000888, - "loss": 7.0689, + "epoch": 0.76, + "learning_rate": 0.000896, + "loss": 7.4182, "step": 451 }, { - "epoch": 0.38, - "learning_rate": 0.0008900000000000001, - "loss": 7.0351, + "epoch": 0.76, + "learning_rate": 0.000898, + "loss": 7.2457, "step": 452 }, { - "epoch": 0.38, - "learning_rate": 0.000892, - "loss": 31.0659, + "epoch": 0.76, + "learning_rate": 0.0009000000000000001, + "loss": 7.1327, "step": 453 }, { - "epoch": 0.38, - "learning_rate": 0.000894, - "loss": 7.03, + "epoch": 0.76, + "learning_rate": 0.000902, + "loss": 6.9056, "step": 454 }, { - "epoch": 0.38, - "learning_rate": 0.000896, - "loss": 7.0468, + "epoch": 0.77, + "learning_rate": 0.0009040000000000001, + "loss": 7.174, "step": 455 }, { - "epoch": 0.38, - "learning_rate": 0.000898, - "loss": 7.057, + "epoch": 0.77, + "learning_rate": 0.000906, + "loss": 7.0875, "step": 456 }, { - "epoch": 0.38, - "learning_rate": 0.0009000000000000001, - "loss": 7.0597, + "epoch": 0.77, + "learning_rate": 0.0009080000000000001, + "loss": 7.0482, "step": 457 }, { - "epoch": 0.39, - "learning_rate": 0.000902, - "loss": 7.1754, + "epoch": 0.77, + "learning_rate": 0.00091, + "loss": 7.2428, "step": 458 }, { - "epoch": 0.39, - "learning_rate": 0.0009040000000000001, - "loss": 7.1328, + "epoch": 0.77, + "learning_rate": 0.000912, + "loss": 6.9443, "step": 459 }, { - "epoch": 0.39, - "learning_rate": 0.000906, - "loss": 7.1022, + "epoch": 0.77, + "learning_rate": 0.0009140000000000001, + "loss": 6.978, "step": 460 }, { - "epoch": 0.39, - "learning_rate": 0.0009080000000000001, - "loss": 7.0483, + "epoch": 0.78, + "learning_rate": 0.000916, + "loss": 7.0511, "step": 461 }, { - "epoch": 0.39, - "learning_rate": 0.00091, - "loss": 10.5709, + "epoch": 0.78, + "learning_rate": 0.0009180000000000001, + "loss": 6.9723, "step": 462 }, { - "epoch": 0.39, - "learning_rate": 0.000912, - "loss": 7.0752, + "epoch": 0.78, + "learning_rate": 0.00092, + "loss": 6.9377, "step": 463 }, { - "epoch": 0.39, - "learning_rate": 0.0009140000000000001, - "loss": 7.1188, + "epoch": 0.78, + "learning_rate": 0.0009220000000000001, + "loss": 6.9787, "step": 464 }, { - "epoch": 0.39, - "learning_rate": 0.000916, - "loss": 7.0777, + "epoch": 0.78, + "learning_rate": 0.000924, + "loss": 7.0386, "step": 465 }, { - "epoch": 0.39, - "learning_rate": 0.0009180000000000001, - "loss": 7.0153, + "epoch": 0.78, + "learning_rate": 0.0009260000000000001, + "loss": 7.041, "step": 466 }, { - "epoch": 0.39, - "learning_rate": 0.00092, - "loss": 6.8818, - "step": 467 + "epoch": 0.79, + "learning_rate": 0.0009280000000000001, + "loss": 7.1201, + "step": 467 }, { - "epoch": 0.39, - "learning_rate": 0.0009220000000000001, - "loss": 6.9739, + "epoch": 0.79, + "learning_rate": 0.00093, + "loss": 6.9869, "step": 468 }, { - "epoch": 0.39, - "learning_rate": 0.000924, - "loss": 6.8447, + "epoch": 0.79, + "learning_rate": 0.0009320000000000001, + "loss": 6.9156, "step": 469 }, { - "epoch": 0.4, - "learning_rate": 0.0009260000000000001, - "loss": 7.2424, + "epoch": 0.79, + "learning_rate": 0.000934, + "loss": 6.9832, "step": 470 }, { - "epoch": 0.4, - "learning_rate": 0.0009280000000000001, - "loss": 7.0031, + "epoch": 0.79, + "learning_rate": 0.0009360000000000001, + "loss": 6.9666, "step": 471 }, { - "epoch": 0.4, - "learning_rate": 0.00093, - "loss": 7.1577, + "epoch": 0.79, + "learning_rate": 0.0009379999999999999, + "loss": 7.0176, "step": 472 }, { - "epoch": 0.4, - "learning_rate": 0.0009320000000000001, - "loss": 7.0784, + "epoch": 0.8, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 473 }, { - "epoch": 0.4, - "learning_rate": 0.000934, - "loss": 7.1943, + "epoch": 0.8, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 474 }, { - "epoch": 0.4, - "learning_rate": 0.0009360000000000001, - "loss": 6.9866, + "epoch": 0.8, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 475 }, { - "epoch": 0.4, + "epoch": 0.8, "learning_rate": 0.0009379999999999999, - "loss": 6.8905, + "loss": 0.0, "step": 476 }, { - "epoch": 0.4, - "learning_rate": 0.00094, - "loss": 6.8424, + "epoch": 0.8, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 477 }, { - "epoch": 0.4, - "learning_rate": 0.000942, - "loss": 7.0373, + "epoch": 0.8, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 478 }, { - "epoch": 0.4, - "learning_rate": 0.000944, - "loss": 6.9945, + "epoch": 0.81, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 479 }, { - "epoch": 0.4, - "learning_rate": 0.000946, - "loss": 7.1459, + "epoch": 0.81, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 480 }, { - "epoch": 0.4, - "learning_rate": 0.000948, - "loss": 6.942, + "epoch": 0.81, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 481 }, { - "epoch": 0.41, - "learning_rate": 0.00095, - "loss": 6.9965, + "epoch": 0.81, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 482 }, { - "epoch": 0.41, - "learning_rate": 0.0009519999999999999, - "loss": 7.0274, + "epoch": 0.81, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 483 }, { - "epoch": 0.41, - "learning_rate": 0.000954, - "loss": 7.0962, + "epoch": 0.81, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 484 }, { - "epoch": 0.41, - "learning_rate": 0.0009559999999999999, - "loss": 7.0712, + "epoch": 0.82, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 485 }, { - "epoch": 0.41, - "learning_rate": 0.000958, - "loss": 6.9201, + "epoch": 0.82, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 486 }, { - "epoch": 0.41, - "learning_rate": 0.00096, - "loss": 6.9673, + "epoch": 0.82, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 487 }, { - "epoch": 0.41, - "learning_rate": 0.000962, - "loss": 7.0785, + "epoch": 0.82, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 488 }, { - "epoch": 0.41, - "learning_rate": 0.000964, - "loss": 7.0721, + "epoch": 0.82, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 489 }, { - "epoch": 0.41, - "learning_rate": 0.000966, - "loss": 6.9475, + "epoch": 0.82, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 490 }, { - "epoch": 0.41, - "learning_rate": 0.000968, - "loss": 7.1285, + "epoch": 0.83, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 491 }, { - "epoch": 0.41, - "learning_rate": 0.0009699999999999999, - "loss": 7.1993, + "epoch": 0.83, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 492 }, { - "epoch": 0.41, - "learning_rate": 0.000972, - "loss": 7.1318, + "epoch": 0.83, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 493 }, { - "epoch": 0.42, - "learning_rate": 0.000974, - "loss": 7.1, + "epoch": 0.83, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 494 }, { - "epoch": 0.42, - "learning_rate": 0.000976, - "loss": 6.9925, + "epoch": 0.83, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 495 }, { - "epoch": 0.42, - "learning_rate": 0.000978, - "loss": 6.9424, + "epoch": 0.83, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 496 }, { - "epoch": 0.42, - "learning_rate": 0.00098, - "loss": 6.9446, + "epoch": 0.84, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 497 }, { - "epoch": 0.42, - "learning_rate": 0.000982, - "loss": 6.8137, + "epoch": 0.84, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 498 }, { - "epoch": 0.42, - "learning_rate": 0.000984, - "loss": 6.7793, + "epoch": 0.84, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 499 }, { - "epoch": 0.42, - "learning_rate": 0.0009860000000000001, - "loss": 6.6679, + "epoch": 0.84, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 500 }, { - "epoch": 0.42, - "eval_loss": 7.067996978759766, - "eval_runtime": 346.2794, - "eval_samples_per_second": 7.63, - "eval_steps_per_second": 0.638, - "eval_wer": 1.0, + "epoch": 0.84, + "eval_loss": NaN, + "eval_runtime": 772.1245, + "eval_samples_per_second": 3.422, + "eval_steps_per_second": 0.286, + "eval_wer": 2.0097029585094353, "step": 500 }, { - "epoch": 0.42, - "learning_rate": 0.000988, - "loss": 7.1796, + "epoch": 0.84, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 501 }, { - "epoch": 0.42, - "learning_rate": 0.00099, - "loss": 23.0601, + "epoch": 0.84, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 502 }, { - "epoch": 0.42, - "learning_rate": 0.000992, - "loss": 7.1719, + "epoch": 0.85, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 503 }, { - "epoch": 0.42, - "learning_rate": 0.000994, - "loss": 7.1177, + "epoch": 0.85, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 504 }, { - "epoch": 0.42, - "learning_rate": 0.000996, - "loss": 7.0817, + "epoch": 0.85, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 505 }, { - "epoch": 0.43, - "learning_rate": 0.000998, - "loss": 7.2064, + "epoch": 0.85, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 506 }, { - "epoch": 0.43, - "learning_rate": 0.001, - "loss": 7.048, + "epoch": 0.85, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 507 }, { - "epoch": 0.43, - "learning_rate": 0.0009985486211901307, - "loss": 7.0498, + "epoch": 0.85, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 508 }, { - "epoch": 0.43, - "learning_rate": 0.0009970972423802612, - "loss": 7.019, + "epoch": 0.86, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 509 }, { - "epoch": 0.43, - "learning_rate": 0.000995645863570392, - "loss": 7.0718, + "epoch": 0.86, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 510 }, { - "epoch": 0.43, - "learning_rate": 0.0009941944847605226, - "loss": 6.8736, + "epoch": 0.86, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 511 }, { - "epoch": 0.43, - "learning_rate": 0.0009927431059506531, - "loss": 7.0358, + "epoch": 0.86, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 512 }, { - "epoch": 0.43, - "learning_rate": 0.0009912917271407838, - "loss": 7.0267, + "epoch": 0.86, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 513 }, { - "epoch": 0.43, - "learning_rate": 0.0009898403483309143, - "loss": 6.9751, + "epoch": 0.86, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 514 }, { - "epoch": 0.43, - "learning_rate": 0.000988388969521045, - "loss": 7.0864, + "epoch": 0.87, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 515 }, { - "epoch": 0.43, - "learning_rate": 0.0009869375907111755, - "loss": 6.9706, + "epoch": 0.87, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 516 }, { - "epoch": 0.43, - "learning_rate": 0.0009854862119013062, - "loss": 7.0958, + "epoch": 0.87, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 517 }, { - "epoch": 0.44, - "learning_rate": 0.000984034833091437, - "loss": 6.9314, + "epoch": 0.87, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 518 }, { - "epoch": 0.44, - "learning_rate": 0.0009825834542815674, - "loss": 7.2066, + "epoch": 0.87, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 519 }, { - "epoch": 0.44, - "learning_rate": 0.0009811320754716981, - "loss": 6.9461, + "epoch": 0.87, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 520 }, { - "epoch": 0.44, - "learning_rate": 0.0009796806966618288, - "loss": 7.0822, + "epoch": 0.88, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 521 }, { - "epoch": 0.44, - "learning_rate": 0.0009782293178519593, - "loss": 7.0513, + "epoch": 0.88, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 522 }, { - "epoch": 0.44, - "learning_rate": 0.00097677793904209, - "loss": 7.0695, + "epoch": 0.88, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 523 }, { - "epoch": 0.44, - "learning_rate": 0.0009753265602322206, - "loss": 6.9845, + "epoch": 0.88, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 524 }, { - "epoch": 0.44, - "learning_rate": 0.0009738751814223512, - "loss": 6.9712, + "epoch": 0.88, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 525 }, { - "epoch": 0.44, - "learning_rate": 0.0009724238026124818, - "loss": 6.8825, + "epoch": 0.88, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 526 }, { - "epoch": 0.44, - "learning_rate": 0.0009709724238026125, - "loss": 7.0213, + "epoch": 0.89, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 527 }, { - "epoch": 0.44, - "learning_rate": 0.000969521044992743, - "loss": 7.0573, + "epoch": 0.89, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 528 }, { - "epoch": 0.44, - "learning_rate": 0.0009680696661828737, - "loss": 7.0032, + "epoch": 0.89, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 529 }, { - "epoch": 0.45, - "learning_rate": 0.0009666182873730044, - "loss": 7.0469, + "epoch": 0.89, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 530 }, { - "epoch": 0.45, - "learning_rate": 0.000965166908563135, - "loss": 7.011, + "epoch": 0.89, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 531 }, { - "epoch": 0.45, - "learning_rate": 0.0009637155297532656, - "loss": 7.0697, + "epoch": 0.89, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 532 }, { - "epoch": 0.45, - "learning_rate": 0.0009622641509433962, - "loss": 6.8978, + "epoch": 0.9, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 533 }, { - "epoch": 0.45, - "learning_rate": 0.0009608127721335269, - "loss": 6.994, + "epoch": 0.9, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 534 }, { - "epoch": 0.45, - "learning_rate": 0.0009593613933236574, - "loss": 7.099, + "epoch": 0.9, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 535 }, { - "epoch": 0.45, - "learning_rate": 0.0009579100145137881, - "loss": 6.9542, + "epoch": 0.9, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 536 }, { - "epoch": 0.45, - "learning_rate": 0.0009564586357039187, - "loss": 6.9809, + "epoch": 0.9, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 537 }, { - "epoch": 0.45, - "learning_rate": 0.0009550072568940493, - "loss": 6.8488, + "epoch": 0.9, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 538 }, { - "epoch": 0.45, - "learning_rate": 0.0009535558780841799, - "loss": 6.9842, + "epoch": 0.91, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 539 }, { - "epoch": 0.45, - "learning_rate": 0.0009521044992743106, - "loss": 7.1183, + "epoch": 0.91, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 540 }, { - "epoch": 0.45, - "learning_rate": 0.0009506531204644411, - "loss": 7.1515, + "epoch": 0.91, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 541 }, { - "epoch": 0.46, - "learning_rate": 0.0009492017416545718, - "loss": 7.0061, + "epoch": 0.91, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 542 }, { - "epoch": 0.46, - "learning_rate": 0.0009477503628447026, - "loss": 6.8293, + "epoch": 0.91, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 543 }, { - "epoch": 0.46, - "learning_rate": 0.0009462989840348331, - "loss": 7.1905, + "epoch": 0.91, + "learning_rate": 0.0009379999999999999, + "loss": 0.0, "step": 544 }, - { - "epoch": 0.46, - "learning_rate": 0.0009448476052249637, - "loss": 7.0211, - "step": 545 - }, - { - "epoch": 0.46, - "learning_rate": 0.0009433962264150943, - "loss": 6.9183, - "step": 546 - }, - { - "epoch": 0.46, - "learning_rate": 0.000941944847605225, - "loss": 6.6965, - "step": 547 - }, - { - "epoch": 0.46, - "learning_rate": 0.0009404934687953555, - "loss": 6.9138, - "step": 548 - }, - { - "epoch": 0.46, - "learning_rate": 0.0009390420899854863, - "loss": 6.7907, - "step": 549 - }, - { - "epoch": 0.46, - "learning_rate": 0.0009375907111756169, - "loss": 6.7276, - "step": 550 - }, - { - "epoch": 0.46, - "learning_rate": 0.0009361393323657474, - "loss": 7.237, - "step": 551 - }, - { - "epoch": 0.46, - "learning_rate": 0.000934687953555878, - "loss": 7.0571, - "step": 552 - }, - { - "epoch": 0.46, - "learning_rate": 0.0009332365747460088, - "loss": 7.1393, - "step": 553 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009317851959361394, - "loss": 7.0599, - "step": 554 - }, - { - "epoch": 0.47, - "learning_rate": 0.00093033381712627, - "loss": 7.169, - "step": 555 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009288824383164007, - "loss": 7.0099, - "step": 556 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009274310595065311, - "loss": 7.0857, - "step": 557 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009259796806966619, - "loss": 6.9762, - "step": 558 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009245283018867925, - "loss": 7.2202, - "step": 559 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009230769230769232, - "loss": 6.9869, - "step": 560 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009216255442670537, - "loss": 7.2204, - "step": 561 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009201741654571844, - "loss": 6.9178, - "step": 562 - }, - { - "epoch": 0.47, - "learning_rate": 0.000918722786647315, - "loss": 7.1024, - "step": 563 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009172714078374456, - "loss": 6.7413, - "step": 564 - }, - { - "epoch": 0.47, - "learning_rate": 0.0009158200290275763, - "loss": 6.9166, - "step": 565 - }, - { - "epoch": 0.48, - "learning_rate": 0.0009143686502177069, - "loss": 7.025, - "step": 566 - }, - { - "epoch": 0.48, - "learning_rate": 0.0009129172714078375, - "loss": 6.9701, - "step": 567 - }, - { - "epoch": 0.48, - "learning_rate": 0.0009114658925979681, - "loss": 6.9994, - "step": 568 - }, - { - "epoch": 0.48, - "learning_rate": 0.0009100145137880988, - "loss": 7.0043, - "step": 569 - }, - { - "epoch": 0.48, - "learning_rate": 0.0009085631349782293, - "loss": 7.0049, - "step": 570 - }, - { - "epoch": 0.48, - "learning_rate": 0.00090711175616836, - "loss": 7.0456, - "step": 571 - }, - { - "epoch": 0.48, - "learning_rate": 0.0009056603773584906, - "loss": 7.0374, - "step": 572 - }, - { - "epoch": 0.48, - "learning_rate": 0.0009042089985486213, - "loss": 6.9867, - "step": 573 - }, - { - "epoch": 0.48, - "learning_rate": 0.0009027576197387518, - "loss": 7.0539, - "step": 574 - }, - { - "epoch": 0.48, - "learning_rate": 0.0009013062409288825, - "loss": 7.231, - "step": 575 - }, - { - "epoch": 0.48, - "learning_rate": 0.0008998548621190132, - "loss": 7.1384, - "step": 576 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008984034833091437, - "loss": 7.1052, - "step": 577 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008969521044992744, - "loss": 7.0373, - "step": 578 - }, - { - "epoch": 0.49, - "learning_rate": 0.000895500725689405, - "loss": 6.9778, - "step": 579 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008940493468795356, - "loss": 7.1991, - "step": 580 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008925979680696662, - "loss": 7.1533, - "step": 581 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008911465892597969, - "loss": 6.951, - "step": 582 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008896952104499274, - "loss": 7.0072, - "step": 583 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008882438316400581, - "loss": 6.914, - "step": 584 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008867924528301887, - "loss": 7.1663, - "step": 585 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008853410740203193, - "loss": 7.01, - "step": 586 - }, - { - "epoch": 0.49, - "learning_rate": 0.00088388969521045, - "loss": 7.1636, - "step": 587 - }, - { - "epoch": 0.49, - "learning_rate": 0.0008824383164005806, - "loss": 6.9596, - "step": 588 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008809869375907113, - "loss": 6.929, - "step": 589 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008795355587808418, - "loss": 7.0193, - "step": 590 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008780841799709725, - "loss": 7.077, - "step": 591 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008766328011611031, - "loss": 6.9883, - "step": 592 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008751814223512337, - "loss": 7.0503, - "step": 593 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008737300435413643, - "loss": 6.8605, - "step": 594 - }, - { - "epoch": 0.5, - "learning_rate": 0.000872278664731495, - "loss": 6.769, - "step": 595 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008708272859216255, - "loss": 6.8794, - "step": 596 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008693759071117562, - "loss": 6.7565, - "step": 597 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008679245283018869, - "loss": 6.8621, - "step": 598 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008664731494920174, - "loss": 6.6671, - "step": 599 - }, - { - "epoch": 0.5, - "learning_rate": 0.0008650217706821481, - "loss": 6.5365, - "step": 600 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008635703918722787, - "loss": 7.0766, - "step": 601 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008621190130624093, - "loss": 7.2011, - "step": 602 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008606676342525399, - "loss": 7.2054, - "step": 603 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008592162554426706, - "loss": 7.0521, - "step": 604 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008577648766328012, - "loss": 7.1367, - "step": 605 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008563134978229318, - "loss": 7.21, - "step": 606 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008548621190130624, - "loss": 6.9841, - "step": 607 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008534107402031931, - "loss": 7.1412, - "step": 608 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008519593613933237, - "loss": 7.0282, - "step": 609 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008505079825834543, - "loss": 7.0733, - "step": 610 - }, - { - "epoch": 0.51, - "learning_rate": 0.000849056603773585, - "loss": 7.1754, - "step": 611 - }, - { - "epoch": 0.51, - "learning_rate": 0.0008476052249637155, - "loss": 6.9809, - "step": 612 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008461538461538462, - "loss": 6.9781, - "step": 613 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008447024673439768, - "loss": 6.9147, - "step": 614 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008432510885341074, - "loss": 7.0614, - "step": 615 - }, - { - "epoch": 0.52, - "learning_rate": 0.000841799709724238, - "loss": 6.9196, - "step": 616 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008403483309143687, - "loss": 6.9836, - "step": 617 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008388969521044993, - "loss": 6.94, - "step": 618 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008374455732946299, - "loss": 7.0487, - "step": 619 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008359941944847606, - "loss": 7.1344, - "step": 620 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008345428156748912, - "loss": 6.8899, - "step": 621 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008330914368650218, - "loss": 6.9095, - "step": 622 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008316400580551524, - "loss": 6.9635, - "step": 623 - }, - { - "epoch": 0.52, - "learning_rate": 0.0008301886792452831, - "loss": 7.0661, - "step": 624 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008287373004354136, - "loss": 7.1297, - "step": 625 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008272859216255443, - "loss": 7.1099, - "step": 626 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008258345428156749, - "loss": 7.1122, - "step": 627 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008243831640058055, - "loss": 6.9014, - "step": 628 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008229317851959361, - "loss": 6.8948, - "step": 629 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008214804063860668, - "loss": 6.9861, - "step": 630 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008200290275761973, - "loss": 7.0249, - "step": 631 - }, - { - "epoch": 0.53, - "learning_rate": 0.000818577648766328, - "loss": 7.05, - "step": 632 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008171262699564587, - "loss": 6.9883, - "step": 633 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008156748911465893, - "loss": 7.1293, - "step": 634 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008142235123367199, - "loss": 7.0649, - "step": 635 - }, - { - "epoch": 0.53, - "learning_rate": 0.0008127721335268505, - "loss": 7.0533, - "step": 636 - }, - { - "epoch": 0.54, - "learning_rate": 0.0008113207547169812, - "loss": 6.9667, - "step": 637 - }, - { - "epoch": 0.54, - "learning_rate": 0.0008098693759071117, - "loss": 6.9186, - "step": 638 - }, - { - "epoch": 0.54, - "learning_rate": 0.0008084179970972424, - "loss": 6.9798, - "step": 639 - }, - { - "epoch": 0.54, - "learning_rate": 0.000806966618287373, - "loss": 7.2642, - "step": 640 - }, - { - "epoch": 0.54, - "learning_rate": 0.0008055152394775036, - "loss": 6.9569, - "step": 641 - }, - { - "epoch": 0.54, - "learning_rate": 0.0008040638606676342, - "loss": 6.9103, - "step": 642 - }, - { - "epoch": 0.54, - "learning_rate": 0.0008026124818577649, - "loss": 7.0987, - "step": 643 - }, - { - "epoch": 0.54, - "learning_rate": 0.0008011611030478955, - "loss": 7.0009, - "step": 644 - }, - { - "epoch": 0.54, - "learning_rate": 0.0007997097242380261, - "loss": 6.9996, - "step": 645 - }, - { - "epoch": 0.54, - "learning_rate": 0.0007982583454281568, - "loss": 6.8922, - "step": 646 - }, - { - "epoch": 0.54, - "learning_rate": 0.0007968069666182874, - "loss": 6.584, - "step": 647 - }, - { - "epoch": 0.54, - "learning_rate": 0.000795355587808418, - "loss": 7.1011, - "step": 648 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007939042089985486, - "loss": 6.9417, - "step": 649 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007924528301886793, - "loss": 6.5543, - "step": 650 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007910014513788098, - "loss": 7.2658, - "step": 651 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007895500725689405, - "loss": 7.0368, - "step": 652 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007880986937590711, - "loss": 7.1025, - "step": 653 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007866473149492017, - "loss": 7.1689, - "step": 654 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007851959361393324, - "loss": 7.1003, - "step": 655 - }, - { - "epoch": 0.55, - "learning_rate": 0.000783744557329463, - "loss": 7.0462, - "step": 656 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007822931785195936, - "loss": 6.9565, - "step": 657 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007808417997097242, - "loss": 7.1247, - "step": 658 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007793904208998549, - "loss": 6.9882, - "step": 659 - }, - { - "epoch": 0.55, - "learning_rate": 0.0007779390420899854, - "loss": 7.0008, - "step": 660 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007764876632801161, - "loss": 6.9634, - "step": 661 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007750362844702467, - "loss": 7.0588, - "step": 662 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007735849056603774, - "loss": 6.9029, - "step": 663 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007721335268505079, - "loss": 7.0907, - "step": 664 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007706821480406386, - "loss": 7.0847, - "step": 665 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007692307692307693, - "loss": 6.8848, - "step": 666 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007677793904208998, - "loss": 7.0483, - "step": 667 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007663280116110305, - "loss": 6.979, - "step": 668 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007648766328011611, - "loss": 7.2513, - "step": 669 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007634252539912917, - "loss": 7.1019, - "step": 670 - }, - { - "epoch": 0.56, - "learning_rate": 0.0007619738751814223, - "loss": 7.0031, - "step": 671 - }, - { - "epoch": 0.56, - "learning_rate": 0.000760522496371553, - "loss": 7.0034, - "step": 672 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007590711175616835, - "loss": 6.9702, - "step": 673 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007576197387518142, - "loss": 6.8878, - "step": 674 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007561683599419448, - "loss": 7.1037, - "step": 675 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007547169811320755, - "loss": 7.0139, - "step": 676 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007532656023222061, - "loss": 7.1821, - "step": 677 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007518142235123367, - "loss": 7.1849, - "step": 678 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007503628447024674, - "loss": 7.0322, - "step": 679 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007489114658925979, - "loss": 7.1286, - "step": 680 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007474600870827286, - "loss": 6.9087, - "step": 681 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007460087082728592, - "loss": 6.9388, - "step": 682 - }, - { - "epoch": 0.57, - "learning_rate": 0.0007445573294629898, - "loss": 6.9481, - "step": 683 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007431059506531204, - "loss": 6.9418, - "step": 684 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007416545718432511, - "loss": 6.9699, - "step": 685 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007402031930333816, - "loss": 6.9794, - "step": 686 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007387518142235123, - "loss": 7.0891, - "step": 687 - }, - { - "epoch": 0.58, - "learning_rate": 0.000737300435413643, - "loss": 7.0207, - "step": 688 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007358490566037735, - "loss": 7.1929, - "step": 689 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007343976777939043, - "loss": 6.9594, - "step": 690 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007329462989840348, - "loss": 7.2063, - "step": 691 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007314949201741656, - "loss": 6.9044, - "step": 692 - }, - { - "epoch": 0.58, - "learning_rate": 0.000730043541364296, - "loss": 6.9193, - "step": 693 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007285921625544268, - "loss": 7.0464, - "step": 694 - }, - { - "epoch": 0.58, - "learning_rate": 0.0007271407837445574, - "loss": 6.964, - "step": 695 - }, - { - "epoch": 0.59, - "learning_rate": 0.000725689404934688, - "loss": 6.6928, - "step": 696 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007242380261248185, - "loss": 7.1109, - "step": 697 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007227866473149493, - "loss": 7.0787, - "step": 698 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007213352685050797, - "loss": 6.5761, - "step": 699 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007198838896952105, - "loss": 6.9642, - "step": 700 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007184325108853412, - "loss": 7.2433, - "step": 701 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007169811320754717, - "loss": 6.9795, - "step": 702 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007155297532656024, - "loss": 7.1225, - "step": 703 - }, - { - "epoch": 0.59, - "learning_rate": 0.000714078374455733, - "loss": 7.0223, - "step": 704 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007126269956458637, - "loss": 7.0501, - "step": 705 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007111756168359942, - "loss": 7.1019, - "step": 706 - }, - { - "epoch": 0.59, - "learning_rate": 0.0007097242380261249, - "loss": 7.056, - "step": 707 - }, - { - "epoch": 0.6, - "learning_rate": 0.0007082728592162555, - "loss": 6.9328, - "step": 708 - }, - { - "epoch": 0.6, - "learning_rate": 0.0007068214804063861, - "loss": 7.0541, - "step": 709 - }, - { - "epoch": 0.6, - "learning_rate": 0.0007053701015965167, - "loss": 7.0901, - "step": 710 - }, - { - "epoch": 0.6, - "learning_rate": 0.0007039187227866474, - "loss": 6.8945, - "step": 711 - }, - { - "epoch": 0.6, - "learning_rate": 0.000702467343976778, - "loss": 7.2194, - "step": 712 - }, - { - "epoch": 0.6, - "learning_rate": 0.0007010159651669086, - "loss": 7.039, - "step": 713 - }, - { - "epoch": 0.6, - "learning_rate": 0.0006995645863570393, - "loss": 7.051, - "step": 714 - }, - { - "epoch": 0.6, - "learning_rate": 0.0006981132075471698, - "loss": 6.9684, - "step": 715 - }, - { - "epoch": 0.6, - "learning_rate": 0.0006966618287373005, - "loss": 6.9407, - "step": 716 - }, - { - "epoch": 0.6, - "learning_rate": 0.0006952104499274311, - "loss": 7.0311, - "step": 717 - }, - { - "epoch": 0.6, - "learning_rate": 0.0006937590711175617, - "loss": 7.0686, - "step": 718 - }, - { - "epoch": 0.6, - "learning_rate": 0.0006923076923076923, - "loss": 7.026, - "step": 719 - }, - { - "epoch": 0.61, - "learning_rate": 0.000690856313497823, - "loss": 6.9705, - "step": 720 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006894049346879536, - "loss": 6.8694, - "step": 721 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006879535558780842, - "loss": 6.8761, - "step": 722 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006865021770682149, - "loss": 6.946, - "step": 723 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006850507982583455, - "loss": 6.9311, - "step": 724 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006835994194484761, - "loss": 7.0254, - "step": 725 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006821480406386067, - "loss": 6.9334, - "step": 726 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006806966618287374, - "loss": 6.908, - "step": 727 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006792452830188679, - "loss": 7.0937, - "step": 728 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006777939042089986, - "loss": 6.972, - "step": 729 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006763425253991292, - "loss": 7.0818, - "step": 730 - }, - { - "epoch": 0.61, - "learning_rate": 0.0006748911465892598, - "loss": 6.9788, - "step": 731 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006734397677793904, - "loss": 7.0501, - "step": 732 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006719883889695211, - "loss": 7.0429, - "step": 733 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006705370101596517, - "loss": 6.9314, - "step": 734 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006690856313497823, - "loss": 7.05, - "step": 735 - }, - { - "epoch": 0.62, - "learning_rate": 0.000667634252539913, - "loss": 6.9731, - "step": 736 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006661828737300436, - "loss": 7.1809, - "step": 737 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006647314949201742, - "loss": 7.1111, - "step": 738 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006632801161103048, - "loss": 7.1377, - "step": 739 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006618287373004355, - "loss": 6.9592, - "step": 740 - }, - { - "epoch": 0.62, - "learning_rate": 0.000660377358490566, - "loss": 7.1619, - "step": 741 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006589259796806967, - "loss": 7.0776, - "step": 742 - }, - { - "epoch": 0.62, - "learning_rate": 0.0006574746008708273, - "loss": 7.1051, - "step": 743 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006560232220609579, - "loss": 6.9187, - "step": 744 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006545718432510886, - "loss": 6.9202, - "step": 745 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006531204644412192, - "loss": 6.7895, - "step": 746 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006516690856313498, - "loss": 6.958, - "step": 747 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006502177068214804, - "loss": 7.14, - "step": 748 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006487663280116111, - "loss": 6.7628, - "step": 749 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006473149492017417, - "loss": 6.4782, - "step": 750 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006458635703918723, - "loss": 7.0834, - "step": 751 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006444121915820029, - "loss": 7.1919, - "step": 752 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006429608127721336, - "loss": 7.0338, - "step": 753 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006415094339622641, - "loss": 6.9988, - "step": 754 - }, - { - "epoch": 0.63, - "learning_rate": 0.0006400580551523948, - "loss": 7.2172, - "step": 755 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006386066763425255, - "loss": 7.0916, - "step": 756 - }, - { - "epoch": 0.64, - "learning_rate": 0.000637155297532656, - "loss": 7.0179, - "step": 757 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006357039187227867, - "loss": 7.1129, - "step": 758 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006342525399129173, - "loss": 6.8909, - "step": 759 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006328011611030479, - "loss": 7.0784, - "step": 760 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006313497822931785, - "loss": 7.0147, - "step": 761 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006298984034833092, - "loss": 6.9088, - "step": 762 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006284470246734397, - "loss": 6.913, - "step": 763 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006269956458635704, - "loss": 6.9156, - "step": 764 - }, - { - "epoch": 0.64, - "learning_rate": 0.000625544267053701, - "loss": 7.2477, - "step": 765 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006240928882438317, - "loss": 7.0455, - "step": 766 - }, - { - "epoch": 0.64, - "learning_rate": 0.0006226415094339623, - "loss": 7.0375, - "step": 767 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006211901306240929, - "loss": 6.835, - "step": 768 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006197387518142236, - "loss": 6.9587, - "step": 769 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006182873730043541, - "loss": 7.0557, - "step": 770 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006168359941944848, - "loss": 7.0017, - "step": 771 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006153846153846154, - "loss": 7.1627, - "step": 772 - }, - { - "epoch": 0.65, - "learning_rate": 0.000613933236574746, - "loss": 7.0343, - "step": 773 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006124818577648766, - "loss": 6.8341, - "step": 774 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006110304789550073, - "loss": 7.0343, - "step": 775 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006095791001451378, - "loss": 6.9326, - "step": 776 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006081277213352685, - "loss": 6.8837, - "step": 777 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006066763425253992, - "loss": 6.9924, - "step": 778 - }, - { - "epoch": 0.65, - "learning_rate": 0.0006052249637155298, - "loss": 7.0209, - "step": 779 - }, - { - "epoch": 0.66, - "learning_rate": 0.0006037735849056604, - "loss": 7.2092, - "step": 780 - }, - { - "epoch": 0.66, - "learning_rate": 0.000602322206095791, - "loss": 7.0569, - "step": 781 - }, - { - "epoch": 0.66, - "learning_rate": 0.0006008708272859217, - "loss": 6.9737, - "step": 782 - }, - { - "epoch": 0.66, - "learning_rate": 0.0005994194484760522, - "loss": 6.8599, - "step": 783 - }, - { - "epoch": 0.66, - "learning_rate": 0.0005979680696661829, - "loss": 7.0745, - "step": 784 - }, - { - "epoch": 0.66, - "learning_rate": 0.0005965166908563135, - "loss": 7.158, - "step": 785 - }, - { - "epoch": 0.66, - "learning_rate": 0.0005950653120464441, - "loss": 6.8415, - "step": 786 - }, - { - "epoch": 0.66, - "learning_rate": 0.0005936139332365747, - "loss": 7.1236, - "step": 787 - }, - { - "epoch": 0.66, - "learning_rate": 0.0005921625544267054, - "loss": 6.9651, - "step": 788 - }, - { - "epoch": 0.66, - "learning_rate": 0.0005907111756168359, - "loss": 6.9907, - "step": 789 - }, - { - "epoch": 0.66, - "learning_rate": 0.0005892597968069666, - "loss": 6.9473, - "step": 790 - }, - { - "epoch": 0.66, - "learning_rate": 0.0005878084179970973, - "loss": 7.1449, - "step": 791 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005863570391872278, - "loss": 6.9635, - "step": 792 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005849056603773585, - "loss": 6.9782, - "step": 793 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005834542815674891, - "loss": 6.8888, - "step": 794 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005820029027576198, - "loss": 6.8093, - "step": 795 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005805515239477503, - "loss": 6.708, - "step": 796 - }, - { - "epoch": 0.67, - "learning_rate": 0.000579100145137881, - "loss": 6.7831, - "step": 797 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005776487663280116, - "loss": 6.8203, - "step": 798 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005761973875181422, - "loss": 6.7705, - "step": 799 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005747460087082728, - "loss": 6.4364, - "step": 800 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005732946298984035, - "loss": 7.09, - "step": 801 - }, - { - "epoch": 0.67, - "learning_rate": 0.0005718432510885341, - "loss": 6.9635, - "step": 802 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005703918722786647, - "loss": 6.9889, - "step": 803 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005689404934687954, - "loss": 7.0255, - "step": 804 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005674891146589259, - "loss": 7.0305, - "step": 805 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005660377358490566, - "loss": 6.9456, - "step": 806 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005645863570391872, - "loss": 6.9681, - "step": 807 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005631349782293179, - "loss": 7.0481, - "step": 808 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005616835994194484, - "loss": 6.9558, - "step": 809 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005602322206095791, - "loss": 7.0219, - "step": 810 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005587808417997097, - "loss": 6.9424, - "step": 811 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005573294629898403, - "loss": 6.9673, - "step": 812 - }, - { - "epoch": 0.68, - "learning_rate": 0.000555878084179971, - "loss": 6.8765, - "step": 813 - }, - { - "epoch": 0.68, - "learning_rate": 0.0005544267053701016, - "loss": 7.1636, - "step": 814 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005529753265602322, - "loss": 7.1162, - "step": 815 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005515239477503628, - "loss": 6.8999, - "step": 816 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005500725689404935, - "loss": 7.0049, - "step": 817 - }, - { - "epoch": 0.69, - "learning_rate": 0.000548621190130624, - "loss": 6.8031, - "step": 818 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005471698113207547, - "loss": 7.0143, - "step": 819 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005457184325108853, - "loss": 7.0933, - "step": 820 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005442670537010159, - "loss": 6.9953, - "step": 821 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005428156748911465, - "loss": 6.9575, - "step": 822 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005413642960812772, - "loss": 7.0271, - "step": 823 - }, - { - "epoch": 0.69, - "learning_rate": 0.000539912917271408, - "loss": 7.1252, - "step": 824 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005384615384615384, - "loss": 6.9604, - "step": 825 - }, - { - "epoch": 0.69, - "learning_rate": 0.0005370101596516691, - "loss": 6.9373, - "step": 826 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005355587808417997, - "loss": 7.0669, - "step": 827 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005341074020319303, - "loss": 7.0643, - "step": 828 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005326560232220609, - "loss": 7.0197, - "step": 829 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005312046444121916, - "loss": 7.1988, - "step": 830 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005297532656023221, - "loss": 7.0213, - "step": 831 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005283018867924528, - "loss": 6.7809, - "step": 832 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005268505079825834, - "loss": 6.8629, - "step": 833 - }, - { - "epoch": 0.7, - "learning_rate": 0.000525399129172714, - "loss": 7.0814, - "step": 834 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005239477503628448, - "loss": 7.019, - "step": 835 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005224963715529754, - "loss": 7.1263, - "step": 836 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005210449927431061, - "loss": 7.0374, - "step": 837 - }, - { - "epoch": 0.7, - "learning_rate": 0.0005195936139332365, - "loss": 6.7339, - "step": 838 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005181422351233673, - "loss": 7.1797, - "step": 839 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005166908563134979, - "loss": 7.0079, - "step": 840 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005152394775036285, - "loss": 7.0764, - "step": 841 - }, - { - "epoch": 0.71, - "learning_rate": 0.000513788098693759, - "loss": 6.9529, - "step": 842 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005123367198838898, - "loss": 7.1791, - "step": 843 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005108853410740202, - "loss": 6.8321, - "step": 844 - }, - { - "epoch": 0.71, - "learning_rate": 0.000509433962264151, - "loss": 6.9381, - "step": 845 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005079825834542817, - "loss": 6.8807, - "step": 846 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005065312046444122, - "loss": 6.9491, - "step": 847 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005050798258345429, - "loss": 6.6411, - "step": 848 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005036284470246735, - "loss": 6.981, - "step": 849 - }, - { - "epoch": 0.71, - "learning_rate": 0.0005021770682148041, - "loss": 6.9311, - "step": 850 - }, - { - "epoch": 0.72, - "learning_rate": 0.0005007256894049347, - "loss": 7.2351, - "step": 851 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004992743105950654, - "loss": 7.0825, - "step": 852 - }, - { - "epoch": 0.72, - "learning_rate": 0.000497822931785196, - "loss": 7.0775, - "step": 853 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004963715529753266, - "loss": 7.1195, - "step": 854 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004949201741654572, - "loss": 7.0512, - "step": 855 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004934687953555878, - "loss": 7.0764, - "step": 856 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004920174165457185, - "loss": 6.9077, - "step": 857 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004905660377358491, - "loss": 7.0929, - "step": 858 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004891146589259797, - "loss": 6.9194, - "step": 859 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004876632801161103, - "loss": 6.9869, - "step": 860 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004862119013062409, - "loss": 7.0333, - "step": 861 - }, - { - "epoch": 0.72, - "learning_rate": 0.0004847605224963715, - "loss": 7.0011, - "step": 862 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004833091436865022, - "loss": 6.908, - "step": 863 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004818577648766328, - "loss": 7.0157, - "step": 864 - }, - { - "epoch": 0.73, - "learning_rate": 0.00048040638606676347, - "loss": 7.031, - "step": 865 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047895500725689407, - "loss": 7.0082, - "step": 866 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047750362844702467, - "loss": 6.8794, - "step": 867 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004760522496371553, - "loss": 6.9123, - "step": 868 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004746008708272859, - "loss": 6.9084, - "step": 869 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004731494920174166, - "loss": 6.8418, - "step": 870 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004716981132075472, - "loss": 6.9268, - "step": 871 - }, - { - "epoch": 0.73, - "learning_rate": 0.00047024673439767777, - "loss": 6.8352, - "step": 872 - }, - { - "epoch": 0.73, - "learning_rate": 0.0004687953555878084, - "loss": 7.1646, - "step": 873 - }, - { - "epoch": 0.73, - "learning_rate": 0.000467343976777939, - "loss": 7.0545, - "step": 874 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004658925979680697, - "loss": 7.0751, - "step": 875 - }, - { - "epoch": 0.74, - "learning_rate": 0.00046444121915820033, - "loss": 6.9282, - "step": 876 - }, - { - "epoch": 0.74, - "learning_rate": 0.00046298984034833093, - "loss": 6.8993, - "step": 877 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004615384615384616, - "loss": 7.1428, - "step": 878 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004600870827285922, - "loss": 6.9436, - "step": 879 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004586357039187228, - "loss": 7.0862, - "step": 880 - }, - { - "epoch": 0.74, - "learning_rate": 0.00045718432510885343, - "loss": 6.9531, - "step": 881 - }, - { - "epoch": 0.74, - "learning_rate": 0.00045573294629898403, - "loss": 7.0204, - "step": 882 - }, - { - "epoch": 0.74, - "learning_rate": 0.00045428156748911463, - "loss": 7.0929, - "step": 883 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004528301886792453, - "loss": 7.102, - "step": 884 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004513788098693759, - "loss": 6.8697, - "step": 885 - }, - { - "epoch": 0.74, - "learning_rate": 0.0004499274310595066, - "loss": 7.0026, - "step": 886 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004484760522496372, - "loss": 7.031, - "step": 887 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004470246734397678, - "loss": 6.9972, - "step": 888 - }, - { - "epoch": 0.75, - "learning_rate": 0.00044557329462989844, - "loss": 7.0602, - "step": 889 - }, - { - "epoch": 0.75, - "learning_rate": 0.00044412191582002904, - "loss": 6.9149, - "step": 890 - }, - { - "epoch": 0.75, - "learning_rate": 0.00044267053701015964, - "loss": 7.1231, - "step": 891 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004412191582002903, - "loss": 7.0089, - "step": 892 - }, - { - "epoch": 0.75, - "learning_rate": 0.0004397677793904209, - "loss": 6.9217, - "step": 893 - }, - { - "epoch": 0.75, - "learning_rate": 0.00043831640058055154, - "loss": 6.8635, - "step": 894 - }, - { - "epoch": 0.75, - "learning_rate": 0.00043686502177068214, - "loss": 6.8953, - "step": 895 - }, - { - "epoch": 0.75, - "learning_rate": 0.00043541364296081274, - "loss": 6.8953, - "step": 896 - }, - { - "epoch": 0.75, - "learning_rate": 0.00043396226415094345, - "loss": 6.7635, - "step": 897 - }, - { - "epoch": 0.75, - "learning_rate": 0.00043251088534107404, - "loss": 6.5849, - "step": 898 - }, - { - "epoch": 0.76, - "learning_rate": 0.00043105950653120464, - "loss": 6.7803, - "step": 899 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004296081277213353, - "loss": 6.3055, - "step": 900 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004281567489114659, - "loss": 7.1556, - "step": 901 - }, - { - "epoch": 0.76, - "learning_rate": 0.00042670537010159655, - "loss": 7.1128, - "step": 902 - }, - { - "epoch": 0.76, - "learning_rate": 0.00042525399129172715, - "loss": 7.0628, - "step": 903 - }, - { - "epoch": 0.76, - "learning_rate": 0.00042380261248185774, - "loss": 6.8395, - "step": 904 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004223512336719884, - "loss": 7.0728, - "step": 905 - }, - { - "epoch": 0.76, - "learning_rate": 0.000420899854862119, - "loss": 7.0456, - "step": 906 - }, - { - "epoch": 0.76, - "learning_rate": 0.00041944847605224965, - "loss": 6.9815, - "step": 907 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004179970972423803, - "loss": 7.3142, - "step": 908 - }, - { - "epoch": 0.76, - "learning_rate": 0.0004165457184325109, - "loss": 6.9824, - "step": 909 - }, - { - "epoch": 0.77, - "learning_rate": 0.00041509433962264155, - "loss": 7.0251, - "step": 910 - }, - { - "epoch": 0.77, - "learning_rate": 0.00041364296081277215, - "loss": 6.957, - "step": 911 - }, - { - "epoch": 0.77, - "learning_rate": 0.00041219158200290275, - "loss": 7.011, - "step": 912 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004107402031930334, - "loss": 6.9846, - "step": 913 - }, - { - "epoch": 0.77, - "learning_rate": 0.000409288824383164, - "loss": 6.9361, - "step": 914 - }, - { - "epoch": 0.77, - "learning_rate": 0.00040783744557329466, - "loss": 7.012, - "step": 915 - }, - { - "epoch": 0.77, - "learning_rate": 0.00040638606676342525, - "loss": 7.0764, - "step": 916 - }, - { - "epoch": 0.77, - "learning_rate": 0.00040493468795355585, - "loss": 7.0074, - "step": 917 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004034833091436865, - "loss": 6.9982, - "step": 918 - }, - { - "epoch": 0.77, - "learning_rate": 0.0004020319303338171, - "loss": 7.0356, - "step": 919 - }, - { - "epoch": 0.77, - "learning_rate": 0.00040058055152394776, - "loss": 7.024, - "step": 920 - }, - { - "epoch": 0.77, - "learning_rate": 0.0003991291727140784, - "loss": 6.9512, - "step": 921 - }, - { - "epoch": 0.78, - "learning_rate": 0.000397677793904209, - "loss": 7.1374, - "step": 922 - }, - { - "epoch": 0.78, - "learning_rate": 0.00039622641509433966, - "loss": 7.0076, - "step": 923 - }, - { - "epoch": 0.78, - "learning_rate": 0.00039477503628447026, - "loss": 7.0693, - "step": 924 - }, - { - "epoch": 0.78, - "learning_rate": 0.00039332365747460086, - "loss": 6.8384, - "step": 925 - }, - { - "epoch": 0.78, - "learning_rate": 0.0003918722786647315, - "loss": 6.993, - "step": 926 - }, - { - "epoch": 0.78, - "learning_rate": 0.0003904208998548621, - "loss": 7.0704, - "step": 927 - }, - { - "epoch": 0.78, - "learning_rate": 0.0003889695210449927, - "loss": 7.0208, - "step": 928 - }, - { - "epoch": 0.78, - "learning_rate": 0.00038751814223512336, - "loss": 6.9332, - "step": 929 - }, - { - "epoch": 0.78, - "learning_rate": 0.00038606676342525396, - "loss": 6.9932, - "step": 930 - }, - { - "epoch": 0.78, - "learning_rate": 0.00038461538461538467, - "loss": 6.9625, - "step": 931 - }, - { - "epoch": 0.78, - "learning_rate": 0.00038316400580551527, - "loss": 7.0556, - "step": 932 - }, - { - "epoch": 0.78, - "learning_rate": 0.00038171262699564587, - "loss": 7.1128, - "step": 933 - }, - { - "epoch": 0.79, - "learning_rate": 0.0003802612481857765, - "loss": 6.9821, - "step": 934 - }, - { - "epoch": 0.79, - "learning_rate": 0.0003788098693759071, - "loss": 6.9837, - "step": 935 - }, - { - "epoch": 0.79, - "learning_rate": 0.00037735849056603777, - "loss": 7.0444, - "step": 936 - }, - { - "epoch": 0.79, - "learning_rate": 0.00037590711175616837, - "loss": 6.9861, - "step": 937 - }, - { - "epoch": 0.79, - "learning_rate": 0.00037445573294629897, - "loss": 7.1477, - "step": 938 - }, - { - "epoch": 0.79, - "learning_rate": 0.0003730043541364296, - "loss": 7.0205, - "step": 939 - }, - { - "epoch": 0.79, - "learning_rate": 0.0003715529753265602, - "loss": 6.9423, - "step": 940 - }, - { - "epoch": 0.79, - "learning_rate": 0.0003701015965166908, - "loss": 6.9051, - "step": 941 - }, - { - "epoch": 0.79, - "learning_rate": 0.0003686502177068215, - "loss": 7.1341, - "step": 942 - }, - { - "epoch": 0.79, - "learning_rate": 0.0003671988388969521, - "loss": 6.7609, - "step": 943 - }, - { - "epoch": 0.79, - "learning_rate": 0.0003657474600870828, - "loss": 6.6581, - "step": 944 - }, - { - "epoch": 0.79, - "learning_rate": 0.0003642960812772134, - "loss": 7.0643, - "step": 945 - }, - { - "epoch": 0.8, - "learning_rate": 0.000362844702467344, - "loss": 6.8123, - "step": 946 - }, - { - "epoch": 0.8, - "learning_rate": 0.00036139332365747463, - "loss": 6.6869, - "step": 947 - }, - { - "epoch": 0.8, - "learning_rate": 0.00035994194484760523, - "loss": 6.6426, - "step": 948 - }, - { - "epoch": 0.8, - "learning_rate": 0.0003584905660377358, - "loss": 6.6798, - "step": 949 - }, - { - "epoch": 0.8, - "learning_rate": 0.0003570391872278665, - "loss": 6.6849, - "step": 950 - }, - { - "epoch": 0.8, - "learning_rate": 0.0003555878084179971, - "loss": 7.1134, - "step": 951 - }, - { - "epoch": 0.8, - "learning_rate": 0.00035413642960812773, - "loss": 7.0725, - "step": 952 - }, - { - "epoch": 0.8, - "learning_rate": 0.00035268505079825833, - "loss": 6.9904, - "step": 953 - }, - { - "epoch": 0.8, - "learning_rate": 0.000351233671988389, - "loss": 6.8351, - "step": 954 - }, - { - "epoch": 0.8, - "learning_rate": 0.00034978229317851964, - "loss": 6.963, - "step": 955 - }, - { - "epoch": 0.8, - "learning_rate": 0.00034833091436865023, - "loss": 7.1159, - "step": 956 - }, - { - "epoch": 0.8, - "learning_rate": 0.00034687953555878083, - "loss": 6.9889, - "step": 957 - }, - { - "epoch": 0.81, - "learning_rate": 0.0003454281567489115, - "loss": 7.1882, - "step": 958 - }, - { - "epoch": 0.81, - "learning_rate": 0.0003439767779390421, - "loss": 6.9888, - "step": 959 - }, - { - "epoch": 0.81, - "learning_rate": 0.00034252539912917274, - "loss": 6.9362, - "step": 960 - }, - { - "epoch": 0.81, - "learning_rate": 0.00034107402031930334, - "loss": 6.976, - "step": 961 - }, - { - "epoch": 0.81, - "learning_rate": 0.00033962264150943393, - "loss": 6.9001, - "step": 962 - }, - { - "epoch": 0.81, - "learning_rate": 0.0003381712626995646, - "loss": 6.8253, - "step": 963 - }, - { - "epoch": 0.81, - "learning_rate": 0.0003367198838896952, - "loss": 6.9968, - "step": 964 - }, - { - "epoch": 0.81, - "learning_rate": 0.00033526850507982584, - "loss": 7.0632, - "step": 965 - }, - { - "epoch": 0.81, - "learning_rate": 0.0003338171262699565, - "loss": 6.9839, - "step": 966 - }, - { - "epoch": 0.81, - "learning_rate": 0.0003323657474600871, - "loss": 7.2008, - "step": 967 - }, - { - "epoch": 0.81, - "learning_rate": 0.00033091436865021774, - "loss": 6.9924, - "step": 968 - }, - { - "epoch": 0.81, - "learning_rate": 0.00032946298984034834, - "loss": 6.7617, - "step": 969 - }, - { - "epoch": 0.82, - "learning_rate": 0.00032801161103047894, - "loss": 6.9878, - "step": 970 - }, - { - "epoch": 0.82, - "learning_rate": 0.0003265602322206096, - "loss": 6.9036, - "step": 971 - }, - { - "epoch": 0.82, - "learning_rate": 0.0003251088534107402, - "loss": 6.8653, - "step": 972 - }, - { - "epoch": 0.82, - "learning_rate": 0.00032365747460087085, - "loss": 6.937, - "step": 973 - }, - { - "epoch": 0.82, - "learning_rate": 0.00032220609579100144, - "loss": 6.9683, - "step": 974 - }, - { - "epoch": 0.82, - "learning_rate": 0.00032075471698113204, - "loss": 7.0596, - "step": 975 - }, - { - "epoch": 0.82, - "learning_rate": 0.00031930333817126275, - "loss": 7.0847, - "step": 976 - }, - { - "epoch": 0.82, - "learning_rate": 0.00031785195936139335, - "loss": 6.9152, - "step": 977 - }, - { - "epoch": 0.82, - "learning_rate": 0.00031640058055152395, - "loss": 6.9945, - "step": 978 - }, - { - "epoch": 0.82, - "learning_rate": 0.0003149492017416546, - "loss": 6.9241, - "step": 979 - }, - { - "epoch": 0.82, - "learning_rate": 0.0003134978229317852, - "loss": 6.9992, - "step": 980 - }, - { - "epoch": 0.82, - "learning_rate": 0.00031204644412191585, - "loss": 7.2573, - "step": 981 - }, - { - "epoch": 0.83, - "learning_rate": 0.00031059506531204645, - "loss": 6.9963, - "step": 982 - }, - { - "epoch": 0.83, - "learning_rate": 0.00030914368650217705, - "loss": 7.021, - "step": 983 - }, - { - "epoch": 0.83, - "learning_rate": 0.0003076923076923077, - "loss": 7.1762, - "step": 984 - }, - { - "epoch": 0.83, - "learning_rate": 0.0003062409288824383, - "loss": 6.9364, - "step": 985 - }, - { - "epoch": 0.83, - "learning_rate": 0.0003047895500725689, - "loss": 6.957, - "step": 986 - }, - { - "epoch": 0.83, - "learning_rate": 0.0003033381712626996, - "loss": 7.0227, - "step": 987 - }, - { - "epoch": 0.83, - "learning_rate": 0.0003018867924528302, - "loss": 6.9943, - "step": 988 - }, - { - "epoch": 0.83, - "learning_rate": 0.00030043541364296086, - "loss": 6.9493, - "step": 989 - }, - { - "epoch": 0.83, - "learning_rate": 0.00029898403483309146, - "loss": 6.9182, - "step": 990 - }, - { - "epoch": 0.83, - "learning_rate": 0.00029753265602322206, - "loss": 6.9994, - "step": 991 - }, - { - "epoch": 0.83, - "learning_rate": 0.0002960812772133527, - "loss": 6.9722, - "step": 992 - }, - { - "epoch": 0.83, - "learning_rate": 0.0002946298984034833, - "loss": 6.9006, - "step": 993 - }, - { - "epoch": 0.84, - "learning_rate": 0.0002931785195936139, - "loss": 7.0978, - "step": 994 - }, - { - "epoch": 0.84, - "learning_rate": 0.00029172714078374456, - "loss": 7.015, - "step": 995 - }, - { - "epoch": 0.84, - "learning_rate": 0.00029027576197387516, - "loss": 7.2474, - "step": 996 - }, - { - "epoch": 0.84, - "learning_rate": 0.0002888243831640058, - "loss": 6.9951, - "step": 997 - }, - { - "epoch": 0.84, - "learning_rate": 0.0002873730043541364, - "loss": 6.7982, - "step": 998 - }, - { - "epoch": 0.84, - "learning_rate": 0.00028592162554426706, - "loss": 6.8156, - "step": 999 - }, - { - "epoch": 0.84, - "learning_rate": 0.0002844702467343977, - "loss": 6.6049, - "step": 1000 - }, - { - "epoch": 0.84, - "eval_loss": 6.955667972564697, - "eval_runtime": 758.0162, - "eval_samples_per_second": 3.485, - "eval_steps_per_second": 0.292, - "eval_wer": 2.0097029585094353, - "step": 1000 - }, - { - "epoch": 0.84, - "learning_rate": 0.0002830188679245283, - "loss": 6.9633, - "step": 1001 - }, - { - "epoch": 0.84, - "learning_rate": 0.00028156748911465897, - "loss": 7.0516, - "step": 1002 - }, - { - "epoch": 0.84, - "learning_rate": 0.00028011611030478957, - "loss": 7.019, - "step": 1003 - }, - { - "epoch": 0.84, - "learning_rate": 0.00027866473149492017, - "loss": 7.0578, - "step": 1004 - }, - { - "epoch": 0.84, - "learning_rate": 0.0002772133526850508, - "loss": 6.9281, - "step": 1005 - }, - { - "epoch": 0.85, - "learning_rate": 0.0002757619738751814, - "loss": 7.0005, - "step": 1006 - }, - { - "epoch": 0.85, - "learning_rate": 0.000274310595065312, - "loss": 6.9486, - "step": 1007 - }, - { - "epoch": 0.85, - "learning_rate": 0.00027285921625544267, - "loss": 6.8204, - "step": 1008 - }, - { - "epoch": 0.85, - "learning_rate": 0.00027140783744557327, - "loss": 7.0691, - "step": 1009 - }, - { - "epoch": 0.85, - "learning_rate": 0.000269956458635704, - "loss": 6.8514, - "step": 1010 - }, - { - "epoch": 0.85, - "learning_rate": 0.0002685050798258346, - "loss": 6.9034, - "step": 1011 - }, - { - "epoch": 0.85, - "learning_rate": 0.00026705370101596517, - "loss": 6.9737, - "step": 1012 - }, - { - "epoch": 0.85, - "learning_rate": 0.0002656023222060958, - "loss": 6.8929, - "step": 1013 - }, - { - "epoch": 0.85, - "learning_rate": 0.0002641509433962264, - "loss": 6.8821, - "step": 1014 - }, - { - "epoch": 0.85, - "learning_rate": 0.000262699564586357, - "loss": 6.9074, - "step": 1015 - }, - { - "epoch": 0.85, - "learning_rate": 0.0002612481857764877, - "loss": 7.0795, - "step": 1016 - }, - { - "epoch": 0.85, - "learning_rate": 0.0002597968069666183, - "loss": 6.8741, - "step": 1017 - }, - { - "epoch": 0.86, - "learning_rate": 0.0002583454281567489, - "loss": 6.925, - "step": 1018 - }, - { - "epoch": 0.86, - "learning_rate": 0.0002568940493468795, - "loss": 7.0645, - "step": 1019 - }, - { - "epoch": 0.86, - "learning_rate": 0.0002554426705370101, - "loss": 6.8357, - "step": 1020 - }, - { - "epoch": 0.86, - "learning_rate": 0.00025399129172714083, - "loss": 7.0222, - "step": 1021 - }, - { - "epoch": 0.86, - "learning_rate": 0.00025253991291727143, - "loss": 7.099, - "step": 1022 - }, - { - "epoch": 0.86, - "learning_rate": 0.00025108853410740203, - "loss": 7.1473, - "step": 1023 - }, - { - "epoch": 0.86, - "learning_rate": 0.0002496371552975327, - "loss": 6.9464, - "step": 1024 - }, - { - "epoch": 0.86, - "learning_rate": 0.0002481857764876633, - "loss": 6.8569, - "step": 1025 - }, - { - "epoch": 0.86, - "learning_rate": 0.0002467343976777939, - "loss": 6.8892, - "step": 1026 - }, - { - "epoch": 0.86, - "learning_rate": 0.00024528301886792453, - "loss": 6.9002, - "step": 1027 - }, - { - "epoch": 0.86, - "learning_rate": 0.00024383164005805516, - "loss": 6.9339, - "step": 1028 - }, - { - "epoch": 0.87, - "learning_rate": 0.00024238026124818576, - "loss": 6.966, - "step": 1029 - }, - { - "epoch": 0.87, - "learning_rate": 0.0002409288824383164, - "loss": 6.9522, - "step": 1030 - }, - { - "epoch": 0.87, - "learning_rate": 0.00023947750362844704, - "loss": 7.0329, - "step": 1031 - }, - { - "epoch": 0.87, - "learning_rate": 0.00023802612481857766, - "loss": 6.8151, - "step": 1032 - }, - { - "epoch": 0.87, - "learning_rate": 0.0002365747460087083, - "loss": 7.0338, - "step": 1033 - }, - { - "epoch": 0.87, - "learning_rate": 0.00023512336719883889, - "loss": 6.9186, - "step": 1034 - }, - { - "epoch": 0.87, - "learning_rate": 0.0002336719883889695, - "loss": 6.9835, - "step": 1035 - }, - { - "epoch": 0.87, - "learning_rate": 0.00023222060957910016, - "loss": 7.1005, - "step": 1036 - }, - { - "epoch": 0.87, - "learning_rate": 0.0002307692307692308, - "loss": 6.8181, - "step": 1037 - }, - { - "epoch": 0.87, - "learning_rate": 0.0002293178519593614, - "loss": 7.0983, - "step": 1038 - }, - { - "epoch": 0.87, - "learning_rate": 0.00022786647314949202, - "loss": 6.9616, - "step": 1039 - }, - { - "epoch": 0.87, - "learning_rate": 0.00022641509433962264, - "loss": 6.8104, - "step": 1040 - }, - { - "epoch": 0.88, - "learning_rate": 0.0002249637155297533, - "loss": 6.9079, - "step": 1041 - }, - { - "epoch": 0.88, - "learning_rate": 0.0002235123367198839, - "loss": 6.8955, - "step": 1042 - }, - { - "epoch": 0.88, - "learning_rate": 0.00022206095791001452, - "loss": 6.9046, - "step": 1043 - }, - { - "epoch": 0.88, - "learning_rate": 0.00022060957910014514, - "loss": 6.9043, - "step": 1044 - }, - { - "epoch": 0.88, - "learning_rate": 0.00021915820029027577, - "loss": 6.9388, - "step": 1045 - }, - { - "epoch": 0.88, - "learning_rate": 0.00021770682148040637, - "loss": 6.9023, - "step": 1046 - }, - { - "epoch": 0.88, - "learning_rate": 0.00021625544267053702, - "loss": 6.7615, - "step": 1047 - }, - { - "epoch": 0.88, - "learning_rate": 0.00021480406386066765, - "loss": 7.0216, - "step": 1048 - }, - { - "epoch": 0.88, - "learning_rate": 0.00021335268505079827, - "loss": 7.0565, - "step": 1049 - }, - { - "epoch": 0.88, - "learning_rate": 0.00021190130624092887, - "loss": 6.6157, - "step": 1050 - }, - { - "epoch": 0.88, - "learning_rate": 0.0002104499274310595, - "loss": 7.0615, - "step": 1051 - }, - { - "epoch": 0.88, - "learning_rate": 0.00020899854862119015, - "loss": 6.931, - "step": 1052 - }, - { - "epoch": 0.89, - "learning_rate": 0.00020754716981132078, - "loss": 6.9521, - "step": 1053 - }, - { - "epoch": 0.89, - "learning_rate": 0.00020609579100145138, - "loss": 7.0459, - "step": 1054 - }, - { - "epoch": 0.89, - "learning_rate": 0.000204644412191582, - "loss": 7.0397, - "step": 1055 - }, - { - "epoch": 0.89, - "learning_rate": 0.00020319303338171263, - "loss": 7.0696, - "step": 1056 - }, - { - "epoch": 0.89, - "learning_rate": 0.00020174165457184325, - "loss": 6.9833, - "step": 1057 - }, - { - "epoch": 0.89, - "learning_rate": 0.00020029027576197388, - "loss": 6.9062, - "step": 1058 - }, - { - "epoch": 0.89, - "learning_rate": 0.0001988388969521045, - "loss": 7.0576, - "step": 1059 - }, - { - "epoch": 0.89, - "learning_rate": 0.00019738751814223513, - "loss": 7.0124, - "step": 1060 - }, - { - "epoch": 0.89, - "learning_rate": 0.00019593613933236576, - "loss": 7.039, - "step": 1061 - }, - { - "epoch": 0.89, - "learning_rate": 0.00019448476052249636, - "loss": 6.9322, - "step": 1062 - }, - { - "epoch": 0.89, - "learning_rate": 0.00019303338171262698, - "loss": 7.0308, - "step": 1063 - }, - { - "epoch": 0.89, - "learning_rate": 0.00019158200290275763, - "loss": 6.8833, - "step": 1064 - }, - { - "epoch": 0.9, - "learning_rate": 0.00019013062409288826, - "loss": 6.9581, - "step": 1065 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018867924528301889, - "loss": 6.9187, - "step": 1066 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018722786647314948, - "loss": 6.9337, - "step": 1067 - }, - { - "epoch": 0.9, - "learning_rate": 0.0001857764876632801, - "loss": 6.9806, - "step": 1068 - }, - { - "epoch": 0.9, - "learning_rate": 0.00018432510885341076, - "loss": 7.1115, - "step": 1069 - }, - { - "epoch": 0.9, - "learning_rate": 0.0001828737300435414, - "loss": 7.0317, - "step": 1070 - }, - { - "epoch": 0.9, - "learning_rate": 0.000181422351233672, - "loss": 6.8814, - "step": 1071 - }, - { - "epoch": 0.9, - "learning_rate": 0.00017997097242380261, - "loss": 7.0167, - "step": 1072 - }, - { - "epoch": 0.9, - "learning_rate": 0.00017851959361393324, - "loss": 6.7661, - "step": 1073 - }, - { - "epoch": 0.9, - "learning_rate": 0.00017706821480406387, - "loss": 6.9122, - "step": 1074 - }, - { - "epoch": 0.9, - "learning_rate": 0.0001756168359941945, - "loss": 6.6888, - "step": 1075 - }, - { - "epoch": 0.9, - "learning_rate": 0.00017416545718432512, - "loss": 7.0122, - "step": 1076 - }, - { - "epoch": 0.91, - "learning_rate": 0.00017271407837445574, - "loss": 7.1287, - "step": 1077 - }, - { - "epoch": 0.91, - "learning_rate": 0.00017126269956458637, - "loss": 7.0498, - "step": 1078 - }, - { - "epoch": 0.91, - "learning_rate": 0.00016981132075471697, - "loss": 6.8909, - "step": 1079 - }, - { - "epoch": 0.91, - "learning_rate": 0.0001683599419448476, - "loss": 6.9292, - "step": 1080 - }, - { - "epoch": 0.91, - "learning_rate": 0.00016690856313497825, - "loss": 6.9645, - "step": 1081 - }, - { - "epoch": 0.91, - "learning_rate": 0.00016545718432510887, - "loss": 7.2057, - "step": 1082 - }, - { - "epoch": 0.91, - "learning_rate": 0.00016400580551523947, - "loss": 6.8021, - "step": 1083 - }, - { - "epoch": 0.91, - "learning_rate": 0.0001625544267053701, - "loss": 6.8902, - "step": 1084 - }, - { - "epoch": 0.91, - "learning_rate": 0.00016110304789550072, - "loss": 6.936, - "step": 1085 - }, - { - "epoch": 0.91, - "learning_rate": 0.00015965166908563138, - "loss": 6.961, - "step": 1086 - }, - { - "epoch": 0.91, - "learning_rate": 0.00015820029027576197, - "loss": 6.933, - "step": 1087 - }, - { - "epoch": 0.91, - "learning_rate": 0.0001567489114658926, - "loss": 6.9899, - "step": 1088 - }, - { - "epoch": 0.92, - "learning_rate": 0.00015529753265602323, - "loss": 6.9847, - "step": 1089 - }, - { - "epoch": 0.92, - "learning_rate": 0.00015384615384615385, - "loss": 6.8568, - "step": 1090 - }, - { - "epoch": 0.92, - "learning_rate": 0.00015239477503628445, - "loss": 6.9409, - "step": 1091 - }, - { - "epoch": 0.92, - "learning_rate": 0.0001509433962264151, - "loss": 6.8721, - "step": 1092 - }, - { - "epoch": 0.92, - "learning_rate": 0.00014949201741654573, - "loss": 6.8431, - "step": 1093 - }, { "epoch": 0.92, - "learning_rate": 0.00014804063860667635, - "loss": 6.9966, - "step": 1094 - }, - { - "epoch": 0.92, - "learning_rate": 0.00014658925979680695, - "loss": 6.812, - "step": 1095 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 545 }, { "epoch": 0.92, - "learning_rate": 0.00014513788098693758, - "loss": 7.0126, - "step": 1096 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 546 }, { "epoch": 0.92, - "learning_rate": 0.0001436865021770682, - "loss": 6.8241, - "step": 1097 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 547 }, { "epoch": 0.92, - "learning_rate": 0.00014223512336719886, - "loss": 7.0255, - "step": 1098 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 548 }, { "epoch": 0.92, - "learning_rate": 0.00014078374455732948, - "loss": 7.0514, - "step": 1099 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 549 }, { "epoch": 0.92, - "learning_rate": 0.00013933236574746008, - "loss": 6.2709, - "step": 1100 - }, - { - "epoch": 0.93, - "learning_rate": 0.0001378809869375907, - "loss": 6.9508, - "step": 1101 - }, - { - "epoch": 0.93, - "learning_rate": 0.00013642960812772133, - "loss": 7.0238, - "step": 1102 - }, - { - "epoch": 0.93, - "learning_rate": 0.000134978229317852, - "loss": 7.0654, - "step": 1103 - }, - { - "epoch": 0.93, - "learning_rate": 0.00013352685050798259, - "loss": 6.9235, - "step": 1104 - }, - { - "epoch": 0.93, - "learning_rate": 0.0001320754716981132, - "loss": 6.8528, - "step": 1105 - }, - { - "epoch": 0.93, - "learning_rate": 0.00013062409288824384, - "loss": 6.8695, - "step": 1106 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 550 }, { "epoch": 0.93, - "learning_rate": 0.00012917271407837446, - "loss": 7.1841, - "step": 1107 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 551 }, { "epoch": 0.93, - "learning_rate": 0.00012772133526850506, - "loss": 7.095, - "step": 1108 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 552 }, { "epoch": 0.93, - "learning_rate": 0.00012626995645863572, - "loss": 6.9615, - "step": 1109 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 553 }, { "epoch": 0.93, - "learning_rate": 0.00012481857764876634, - "loss": 6.9125, - "step": 1110 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 554 }, { "epoch": 0.93, - "learning_rate": 0.00012336719883889694, - "loss": 6.949, - "step": 1111 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 555 }, { "epoch": 0.93, - "learning_rate": 0.00012191582002902758, - "loss": 6.976, - "step": 1112 - }, - { - "epoch": 0.94, - "learning_rate": 0.0001204644412191582, - "loss": 6.8741, - "step": 1113 - }, - { - "epoch": 0.94, - "learning_rate": 0.00011901306240928883, - "loss": 6.8775, - "step": 1114 - }, - { - "epoch": 0.94, - "learning_rate": 0.00011756168359941944, - "loss": 7.0588, - "step": 1115 - }, - { - "epoch": 0.94, - "learning_rate": 0.00011611030478955008, - "loss": 7.0187, - "step": 1116 - }, - { - "epoch": 0.94, - "learning_rate": 0.0001146589259796807, - "loss": 7.0744, - "step": 1117 - }, - { - "epoch": 0.94, - "learning_rate": 0.00011320754716981132, - "loss": 7.0031, - "step": 1118 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 556 }, { "epoch": 0.94, - "learning_rate": 0.00011175616835994195, - "loss": 6.7731, - "step": 1119 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 557 }, { "epoch": 0.94, - "learning_rate": 0.00011030478955007257, - "loss": 7.0852, - "step": 1120 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 558 }, { "epoch": 0.94, - "learning_rate": 0.00010885341074020318, - "loss": 6.9801, - "step": 1121 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 559 }, { "epoch": 0.94, - "learning_rate": 0.00010740203193033382, - "loss": 7.1414, - "step": 1122 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 560 }, { "epoch": 0.94, - "learning_rate": 0.00010595065312046444, - "loss": 6.9665, - "step": 1123 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 561 }, { "epoch": 0.94, - "learning_rate": 0.00010449927431059508, - "loss": 7.0696, - "step": 1124 - }, - { - "epoch": 0.95, - "learning_rate": 0.00010304789550072569, - "loss": 7.1454, - "step": 1125 - }, - { - "epoch": 0.95, - "learning_rate": 0.00010159651669085631, - "loss": 6.9548, - "step": 1126 - }, - { - "epoch": 0.95, - "learning_rate": 0.00010014513788098694, - "loss": 7.0, - "step": 1127 - }, - { - "epoch": 0.95, - "learning_rate": 9.869375907111757e-05, - "loss": 6.9365, - "step": 1128 - }, - { - "epoch": 0.95, - "learning_rate": 9.724238026124818e-05, - "loss": 7.0163, - "step": 1129 - }, - { - "epoch": 0.95, - "learning_rate": 9.579100145137882e-05, - "loss": 6.9703, - "step": 1130 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 562 }, { "epoch": 0.95, - "learning_rate": 9.433962264150944e-05, - "loss": 6.8396, - "step": 1131 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 563 }, { "epoch": 0.95, - "learning_rate": 9.288824383164006e-05, - "loss": 6.8502, - "step": 1132 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 564 }, { "epoch": 0.95, - "learning_rate": 9.14368650217707e-05, - "loss": 7.0567, - "step": 1133 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 565 }, { "epoch": 0.95, - "learning_rate": 8.998548621190131e-05, - "loss": 6.9886, - "step": 1134 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 566 }, { "epoch": 0.95, - "learning_rate": 8.853410740203193e-05, - "loss": 7.099, - "step": 1135 - }, - { - "epoch": 0.96, - "learning_rate": 8.708272859216256e-05, - "loss": 6.8939, - "step": 1136 - }, - { - "epoch": 0.96, - "learning_rate": 8.563134978229318e-05, - "loss": 6.9841, - "step": 1137 - }, - { - "epoch": 0.96, - "learning_rate": 8.41799709724238e-05, - "loss": 6.9891, - "step": 1138 - }, - { - "epoch": 0.96, - "learning_rate": 8.272859216255444e-05, - "loss": 6.9023, - "step": 1139 - }, - { - "epoch": 0.96, - "learning_rate": 8.127721335268505e-05, - "loss": 6.805, - "step": 1140 - }, - { - "epoch": 0.96, - "learning_rate": 7.982583454281569e-05, - "loss": 7.0568, - "step": 1141 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 567 }, { "epoch": 0.96, - "learning_rate": 7.83744557329463e-05, - "loss": 6.983, - "step": 1142 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 568 }, { "epoch": 0.96, - "learning_rate": 7.692307692307693e-05, - "loss": 6.9293, - "step": 1143 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 569 }, { "epoch": 0.96, - "learning_rate": 7.547169811320755e-05, - "loss": 7.0366, - "step": 1144 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 570 }, { "epoch": 0.96, - "learning_rate": 7.402031930333818e-05, - "loss": 6.7063, - "step": 1145 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 571 }, { "epoch": 0.96, - "learning_rate": 7.256894049346879e-05, - "loss": 7.1163, - "step": 1146 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 572 }, { "epoch": 0.96, - "learning_rate": 7.111756168359943e-05, - "loss": 6.7976, - "step": 1147 - }, - { - "epoch": 0.97, - "learning_rate": 6.966618287373004e-05, - "loss": 6.6924, - "step": 1148 - }, - { - "epoch": 0.97, - "learning_rate": 6.821480406386067e-05, - "loss": 6.8346, - "step": 1149 - }, - { - "epoch": 0.97, - "learning_rate": 6.676342525399129e-05, - "loss": 6.6107, - "step": 1150 - }, - { - "epoch": 0.97, - "learning_rate": 6.531204644412192e-05, - "loss": 7.0302, - "step": 1151 - }, - { - "epoch": 0.97, - "learning_rate": 6.386066763425253e-05, - "loss": 6.8901, - "step": 1152 - }, - { - "epoch": 0.97, - "learning_rate": 6.240928882438317e-05, - "loss": 7.083, - "step": 1153 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 573 }, { "epoch": 0.97, - "learning_rate": 6.095791001451379e-05, - "loss": 6.977, - "step": 1154 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 574 }, { "epoch": 0.97, - "learning_rate": 5.9506531204644415e-05, - "loss": 6.9382, - "step": 1155 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 575 }, { "epoch": 0.97, - "learning_rate": 5.805515239477504e-05, - "loss": 7.1438, - "step": 1156 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 576 }, { "epoch": 0.97, - "learning_rate": 5.660377358490566e-05, - "loss": 6.9432, - "step": 1157 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 577 }, { "epoch": 0.97, - "learning_rate": 5.5152394775036286e-05, - "loss": 6.8917, - "step": 1158 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 578 }, { "epoch": 0.97, - "learning_rate": 5.370101596516691e-05, - "loss": 6.9139, - "step": 1159 - }, - { - "epoch": 0.98, - "learning_rate": 5.224963715529754e-05, - "loss": 6.9881, - "step": 1160 - }, - { - "epoch": 0.98, - "learning_rate": 5.079825834542816e-05, - "loss": 7.1474, - "step": 1161 - }, - { - "epoch": 0.98, - "learning_rate": 4.934687953555878e-05, - "loss": 7.0312, - "step": 1162 - }, - { - "epoch": 0.98, - "learning_rate": 4.789550072568941e-05, - "loss": 7.0814, - "step": 1163 - }, - { - "epoch": 0.98, - "learning_rate": 4.644412191582003e-05, - "loss": 6.9351, - "step": 1164 - }, - { - "epoch": 0.98, - "learning_rate": 4.4992743105950653e-05, - "loss": 6.9329, - "step": 1165 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 579 }, { "epoch": 0.98, - "learning_rate": 4.354136429608128e-05, - "loss": 7.139, - "step": 1166 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 580 }, { "epoch": 0.98, - "learning_rate": 4.20899854862119e-05, - "loss": 6.7964, - "step": 1167 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 581 }, { "epoch": 0.98, - "learning_rate": 4.0638606676342524e-05, - "loss": 6.9832, - "step": 1168 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 582 }, { "epoch": 0.98, - "learning_rate": 3.918722786647315e-05, - "loss": 6.8912, - "step": 1169 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 583 }, { "epoch": 0.98, - "learning_rate": 3.7735849056603776e-05, - "loss": 6.8612, - "step": 1170 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 584 }, { "epoch": 0.98, - "learning_rate": 3.6284470246734395e-05, - "loss": 6.9285, - "step": 1171 - }, - { - "epoch": 0.99, - "learning_rate": 3.483309143686502e-05, - "loss": 6.8886, - "step": 1172 - }, - { - "epoch": 0.99, - "learning_rate": 3.3381712626995646e-05, - "loss": 7.0357, - "step": 1173 - }, - { - "epoch": 0.99, - "learning_rate": 3.1930333817126266e-05, - "loss": 7.0646, - "step": 1174 - }, - { - "epoch": 0.99, - "learning_rate": 3.0478955007256895e-05, - "loss": 6.7593, - "step": 1175 - }, - { - "epoch": 0.99, - "learning_rate": 2.902757619738752e-05, - "loss": 6.7634, - "step": 1176 - }, - { - "epoch": 0.99, - "learning_rate": 2.7576197387518143e-05, - "loss": 6.9193, - "step": 1177 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 585 }, { "epoch": 0.99, - "learning_rate": 2.612481857764877e-05, - "loss": 7.066, - "step": 1178 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 586 }, { "epoch": 0.99, - "learning_rate": 2.467343976777939e-05, - "loss": 6.7822, - "step": 1179 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 587 }, { "epoch": 0.99, - "learning_rate": 2.3222060957910014e-05, - "loss": 7.1183, - "step": 1180 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 588 }, { "epoch": 0.99, - "learning_rate": 2.177068214804064e-05, - "loss": 6.8649, - "step": 1181 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 589 }, { "epoch": 0.99, - "learning_rate": 2.0319303338171262e-05, - "loss": 7.0857, - "step": 1182 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 590 }, { "epoch": 0.99, - "learning_rate": 1.8867924528301888e-05, - "loss": 6.8863, - "step": 1183 - }, - { - "epoch": 1.0, - "learning_rate": 1.741654571843251e-05, - "loss": 6.8633, - "step": 1184 - }, - { - "epoch": 1.0, - "learning_rate": 1.5965166908563133e-05, - "loss": 6.8773, - "step": 1185 - }, - { - "epoch": 1.0, - "learning_rate": 1.451378809869376e-05, - "loss": 6.8021, - "step": 1186 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 591 }, { "epoch": 1.0, - "learning_rate": 1.3062409288824384e-05, - "loss": 6.8164, - "step": 1187 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 592 }, { "epoch": 1.0, - "learning_rate": 1.1611030478955007e-05, - "loss": 6.646, - "step": 1188 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 593 }, { "epoch": 1.0, - "learning_rate": 1.0159651669085631e-05, - "loss": 6.5812, - "step": 1189 + "learning_rate": 0.0009379999999999999, + "loss": 0.0, + "step": 594 }, { "epoch": 1.0, - "step": 1189, + "step": 594, "total_flos": 0.0, - "train_loss": 7.117300854900287, - "train_runtime": 6046.5433, - "train_samples_per_second": 4.72, - "train_steps_per_second": 0.197 + "train_loss": 6.071190901476927, + "train_runtime": 5557.8497, + "train_samples_per_second": 5.135, + "train_steps_per_second": 0.107 } ], - "max_steps": 1189, + "max_steps": 594, "num_train_epochs": 1, "total_flos": 0.0, "trial_name": null,