diff --git "a/checkpoint-3780/trainer_state.json" "b/checkpoint-3780/trainer_state.json" deleted file mode 100644--- "a/checkpoint-3780/trainer_state.json" +++ /dev/null @@ -1,22786 +0,0 @@ -{ - "best_metric": 0.40209120512008667, - "best_model_checkpoint": "./disaster-tweet-distilbert-classification/checkpoint-378", - "epoch": 10.0, - "global_step": 3780, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 1.0000000000000002e-06, - "loss": 0.7503, - "step": 1 - }, - { - "epoch": 0.01, - "learning_rate": 2.0000000000000003e-06, - "loss": 0.7789, - "step": 2 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.7344, - "step": 3 - }, - { - "epoch": 0.01, - "learning_rate": 4.000000000000001e-06, - "loss": 0.7709, - "step": 4 - }, - { - "epoch": 0.01, - "learning_rate": 5e-06, - "loss": 0.6884, - "step": 5 - }, - { - "epoch": 0.02, - "learning_rate": 6e-06, - "loss": 0.7087, - "step": 6 - }, - { - "epoch": 0.02, - "learning_rate": 7.000000000000001e-06, - "loss": 0.6655, - "step": 7 - }, - { - "epoch": 0.02, - "learning_rate": 8.000000000000001e-06, - "loss": 0.6978, - "step": 8 - }, - { - "epoch": 0.02, - "learning_rate": 9e-06, - "loss": 0.7435, - "step": 9 - }, - { - "epoch": 0.03, - "learning_rate": 1e-05, - "loss": 0.719, - "step": 10 - }, - { - "epoch": 0.03, - "learning_rate": 1.1000000000000001e-05, - "loss": 0.7129, - "step": 11 - }, - { - "epoch": 0.03, - "learning_rate": 1.2e-05, - "loss": 0.7249, - "step": 12 - }, - { - "epoch": 0.03, - "learning_rate": 1.3000000000000001e-05, - "loss": 0.7436, - "step": 13 - }, - { - "epoch": 0.04, - "learning_rate": 1.4000000000000001e-05, - "loss": 0.6886, - "step": 14 - }, - { - "epoch": 0.04, - "learning_rate": 1.5e-05, - "loss": 0.702, - "step": 15 - }, - { - "epoch": 0.04, - "learning_rate": 1.6000000000000003e-05, - "loss": 0.7105, - "step": 16 - }, - { - "epoch": 0.04, - "learning_rate": 1.7000000000000003e-05, - "loss": 0.6709, - "step": 17 - }, - { - "epoch": 0.05, - "learning_rate": 1.8e-05, - "loss": 0.6767, - "step": 18 - }, - { - "epoch": 0.05, - "learning_rate": 1.9e-05, - "loss": 0.6784, - "step": 19 - }, - { - "epoch": 0.05, - "learning_rate": 2e-05, - "loss": 0.7016, - "step": 20 - }, - { - "epoch": 0.06, - "learning_rate": 2.1e-05, - "loss": 0.6308, - "step": 21 - }, - { - "epoch": 0.06, - "learning_rate": 2.2000000000000003e-05, - "loss": 0.645, - "step": 22 - }, - { - "epoch": 0.06, - "learning_rate": 2.3000000000000003e-05, - "loss": 0.6845, - "step": 23 - }, - { - "epoch": 0.06, - "learning_rate": 2.4e-05, - "loss": 0.6891, - "step": 24 - }, - { - "epoch": 0.07, - "learning_rate": 2.5e-05, - "loss": 0.5914, - "step": 25 - }, - { - "epoch": 0.07, - "learning_rate": 2.6000000000000002e-05, - "loss": 0.6916, - "step": 26 - }, - { - "epoch": 0.07, - "learning_rate": 2.7000000000000002e-05, - "loss": 0.5884, - "step": 27 - }, - { - "epoch": 0.07, - "learning_rate": 2.8000000000000003e-05, - "loss": 0.6587, - "step": 28 - }, - { - "epoch": 0.08, - "learning_rate": 2.9e-05, - "loss": 0.6008, - "step": 29 - }, - { - "epoch": 0.08, - "learning_rate": 3e-05, - "loss": 0.6717, - "step": 30 - }, - { - "epoch": 0.08, - "learning_rate": 3.1e-05, - "loss": 0.5795, - "step": 31 - }, - { - "epoch": 0.08, - "learning_rate": 3.2000000000000005e-05, - "loss": 0.6358, - "step": 32 - }, - { - "epoch": 0.09, - "learning_rate": 3.3e-05, - "loss": 0.7508, - "step": 33 - }, - { - "epoch": 0.09, - "learning_rate": 3.4000000000000007e-05, - "loss": 0.6601, - "step": 34 - }, - { - "epoch": 0.09, - "learning_rate": 3.5e-05, - "loss": 0.6573, - "step": 35 - }, - { - "epoch": 0.1, - "learning_rate": 3.6e-05, - "loss": 0.5695, - "step": 36 - }, - { - "epoch": 0.1, - "learning_rate": 3.7e-05, - "loss": 0.5535, - "step": 37 - }, - { - "epoch": 0.1, - "learning_rate": 3.8e-05, - "loss": 0.5813, - "step": 38 - }, - { - "epoch": 0.1, - "learning_rate": 3.9000000000000006e-05, - "loss": 0.5224, - "step": 39 - }, - { - "epoch": 0.11, - "learning_rate": 4e-05, - "loss": 0.4757, - "step": 40 - }, - { - "epoch": 0.11, - "learning_rate": 4.1e-05, - "loss": 0.5529, - "step": 41 - }, - { - "epoch": 0.11, - "learning_rate": 4.2e-05, - "loss": 0.4964, - "step": 42 - }, - { - "epoch": 0.11, - "learning_rate": 4.3e-05, - "loss": 0.4565, - "step": 43 - }, - { - "epoch": 0.12, - "learning_rate": 4.4000000000000006e-05, - "loss": 0.5848, - "step": 44 - }, - { - "epoch": 0.12, - "learning_rate": 4.5e-05, - "loss": 0.7333, - "step": 45 - }, - { - "epoch": 0.12, - "learning_rate": 4.600000000000001e-05, - "loss": 0.5224, - "step": 46 - }, - { - "epoch": 0.12, - "learning_rate": 4.7e-05, - "loss": 0.4826, - "step": 47 - }, - { - "epoch": 0.13, - "learning_rate": 4.8e-05, - "loss": 0.4328, - "step": 48 - }, - { - "epoch": 0.13, - "learning_rate": 4.9e-05, - "loss": 0.2546, - "step": 49 - }, - { - "epoch": 0.13, - "learning_rate": 5e-05, - "loss": 0.2636, - "step": 50 - }, - { - "epoch": 0.13, - "learning_rate": 4.998659517426274e-05, - "loss": 0.4116, - "step": 51 - }, - { - "epoch": 0.14, - "learning_rate": 4.997319034852547e-05, - "loss": 0.3191, - "step": 52 - }, - { - "epoch": 0.14, - "learning_rate": 4.995978552278821e-05, - "loss": 0.2822, - "step": 53 - }, - { - "epoch": 0.14, - "learning_rate": 4.994638069705094e-05, - "loss": 0.8821, - "step": 54 - }, - { - "epoch": 0.15, - "learning_rate": 4.993297587131368e-05, - "loss": 0.483, - "step": 55 - }, - { - "epoch": 0.15, - "learning_rate": 4.9919571045576406e-05, - "loss": 0.7164, - "step": 56 - }, - { - "epoch": 0.15, - "learning_rate": 4.990616621983915e-05, - "loss": 0.4161, - "step": 57 - }, - { - "epoch": 0.15, - "learning_rate": 4.989276139410188e-05, - "loss": 0.2668, - "step": 58 - }, - { - "epoch": 0.16, - "learning_rate": 4.987935656836462e-05, - "loss": 0.5255, - "step": 59 - }, - { - "epoch": 0.16, - "learning_rate": 4.986595174262735e-05, - "loss": 0.3784, - "step": 60 - }, - { - "epoch": 0.16, - "learning_rate": 4.985254691689008e-05, - "loss": 0.5065, - "step": 61 - }, - { - "epoch": 0.16, - "learning_rate": 4.983914209115282e-05, - "loss": 0.1988, - "step": 62 - }, - { - "epoch": 0.17, - "learning_rate": 4.982573726541555e-05, - "loss": 0.4362, - "step": 63 - }, - { - "epoch": 0.17, - "learning_rate": 4.981233243967829e-05, - "loss": 0.6619, - "step": 64 - }, - { - "epoch": 0.17, - "learning_rate": 4.979892761394102e-05, - "loss": 0.3217, - "step": 65 - }, - { - "epoch": 0.17, - "learning_rate": 4.978552278820375e-05, - "loss": 0.2967, - "step": 66 - }, - { - "epoch": 0.18, - "learning_rate": 4.977211796246649e-05, - "loss": 0.2429, - "step": 67 - }, - { - "epoch": 0.18, - "learning_rate": 4.975871313672922e-05, - "loss": 0.6642, - "step": 68 - }, - { - "epoch": 0.18, - "learning_rate": 4.974530831099196e-05, - "loss": 0.56, - "step": 69 - }, - { - "epoch": 0.19, - "learning_rate": 4.973190348525469e-05, - "loss": 1.2979, - "step": 70 - }, - { - "epoch": 0.19, - "learning_rate": 4.9718498659517427e-05, - "loss": 0.5287, - "step": 71 - }, - { - "epoch": 0.19, - "learning_rate": 4.970509383378016e-05, - "loss": 0.4684, - "step": 72 - }, - { - "epoch": 0.19, - "learning_rate": 4.96916890080429e-05, - "loss": 0.283, - "step": 73 - }, - { - "epoch": 0.2, - "learning_rate": 4.967828418230563e-05, - "loss": 0.6818, - "step": 74 - }, - { - "epoch": 0.2, - "learning_rate": 4.966487935656837e-05, - "loss": 0.6141, - "step": 75 - }, - { - "epoch": 0.2, - "learning_rate": 4.96514745308311e-05, - "loss": 0.5046, - "step": 76 - }, - { - "epoch": 0.2, - "learning_rate": 4.963806970509384e-05, - "loss": 0.5266, - "step": 77 - }, - { - "epoch": 0.21, - "learning_rate": 4.962466487935657e-05, - "loss": 0.5944, - "step": 78 - }, - { - "epoch": 0.21, - "learning_rate": 4.961126005361931e-05, - "loss": 0.5631, - "step": 79 - }, - { - "epoch": 0.21, - "learning_rate": 4.959785522788204e-05, - "loss": 0.4791, - "step": 80 - }, - { - "epoch": 0.21, - "learning_rate": 4.958445040214477e-05, - "loss": 0.5645, - "step": 81 - }, - { - "epoch": 0.22, - "learning_rate": 4.957104557640751e-05, - "loss": 0.4349, - "step": 82 - }, - { - "epoch": 0.22, - "learning_rate": 4.955764075067024e-05, - "loss": 0.3865, - "step": 83 - }, - { - "epoch": 0.22, - "learning_rate": 4.954423592493298e-05, - "loss": 0.486, - "step": 84 - }, - { - "epoch": 0.22, - "learning_rate": 4.953083109919571e-05, - "loss": 0.2179, - "step": 85 - }, - { - "epoch": 0.23, - "learning_rate": 4.951742627345845e-05, - "loss": 0.3896, - "step": 86 - }, - { - "epoch": 0.23, - "learning_rate": 4.950402144772118e-05, - "loss": 0.4247, - "step": 87 - }, - { - "epoch": 0.23, - "learning_rate": 4.949061662198392e-05, - "loss": 0.4906, - "step": 88 - }, - { - "epoch": 0.24, - "learning_rate": 4.947721179624665e-05, - "loss": 0.4483, - "step": 89 - }, - { - "epoch": 0.24, - "learning_rate": 4.946380697050939e-05, - "loss": 0.557, - "step": 90 - }, - { - "epoch": 0.24, - "learning_rate": 4.9450402144772116e-05, - "loss": 0.7521, - "step": 91 - }, - { - "epoch": 0.24, - "learning_rate": 4.943699731903486e-05, - "loss": 0.3103, - "step": 92 - }, - { - "epoch": 0.25, - "learning_rate": 4.9423592493297586e-05, - "loss": 0.757, - "step": 93 - }, - { - "epoch": 0.25, - "learning_rate": 4.941018766756033e-05, - "loss": 0.8248, - "step": 94 - }, - { - "epoch": 0.25, - "learning_rate": 4.9396782841823056e-05, - "loss": 0.4591, - "step": 95 - }, - { - "epoch": 0.25, - "learning_rate": 4.938337801608579e-05, - "loss": 0.3912, - "step": 96 - }, - { - "epoch": 0.26, - "learning_rate": 4.9369973190348526e-05, - "loss": 0.5289, - "step": 97 - }, - { - "epoch": 0.26, - "learning_rate": 4.935656836461126e-05, - "loss": 0.3264, - "step": 98 - }, - { - "epoch": 0.26, - "learning_rate": 4.9343163538874e-05, - "loss": 0.2947, - "step": 99 - }, - { - "epoch": 0.26, - "learning_rate": 4.932975871313673e-05, - "loss": 0.2647, - "step": 100 - }, - { - "epoch": 0.27, - "learning_rate": 4.931635388739946e-05, - "loss": 0.3691, - "step": 101 - }, - { - "epoch": 0.27, - "learning_rate": 4.93029490616622e-05, - "loss": 0.4796, - "step": 102 - }, - { - "epoch": 0.27, - "learning_rate": 4.928954423592493e-05, - "loss": 0.4827, - "step": 103 - }, - { - "epoch": 0.28, - "learning_rate": 4.927613941018767e-05, - "loss": 0.2672, - "step": 104 - }, - { - "epoch": 0.28, - "learning_rate": 4.92627345844504e-05, - "loss": 0.7456, - "step": 105 - }, - { - "epoch": 0.28, - "learning_rate": 4.9249329758713136e-05, - "loss": 0.5206, - "step": 106 - }, - { - "epoch": 0.28, - "learning_rate": 4.923592493297587e-05, - "loss": 0.3576, - "step": 107 - }, - { - "epoch": 0.29, - "learning_rate": 4.9222520107238606e-05, - "loss": 0.2596, - "step": 108 - }, - { - "epoch": 0.29, - "learning_rate": 4.920911528150134e-05, - "loss": 0.4115, - "step": 109 - }, - { - "epoch": 0.29, - "learning_rate": 4.9195710455764076e-05, - "loss": 0.3481, - "step": 110 - }, - { - "epoch": 0.29, - "learning_rate": 4.918230563002681e-05, - "loss": 0.4387, - "step": 111 - }, - { - "epoch": 0.3, - "learning_rate": 4.916890080428955e-05, - "loss": 0.5023, - "step": 112 - }, - { - "epoch": 0.3, - "learning_rate": 4.915549597855228e-05, - "loss": 0.5916, - "step": 113 - }, - { - "epoch": 0.3, - "learning_rate": 4.914209115281502e-05, - "loss": 0.5467, - "step": 114 - }, - { - "epoch": 0.3, - "learning_rate": 4.912868632707775e-05, - "loss": 0.5631, - "step": 115 - }, - { - "epoch": 0.31, - "learning_rate": 4.911528150134049e-05, - "loss": 0.5512, - "step": 116 - }, - { - "epoch": 0.31, - "learning_rate": 4.910187667560322e-05, - "loss": 0.5546, - "step": 117 - }, - { - "epoch": 0.31, - "learning_rate": 4.908847184986595e-05, - "loss": 0.4209, - "step": 118 - }, - { - "epoch": 0.31, - "learning_rate": 4.907506702412869e-05, - "loss": 0.6064, - "step": 119 - }, - { - "epoch": 0.32, - "learning_rate": 4.906166219839142e-05, - "loss": 0.5301, - "step": 120 - }, - { - "epoch": 0.32, - "learning_rate": 4.904825737265416e-05, - "loss": 0.436, - "step": 121 - }, - { - "epoch": 0.32, - "learning_rate": 4.903485254691689e-05, - "loss": 0.726, - "step": 122 - }, - { - "epoch": 0.33, - "learning_rate": 4.9021447721179626e-05, - "loss": 0.5288, - "step": 123 - }, - { - "epoch": 0.33, - "learning_rate": 4.900804289544236e-05, - "loss": 0.4887, - "step": 124 - }, - { - "epoch": 0.33, - "learning_rate": 4.8994638069705097e-05, - "loss": 0.3752, - "step": 125 - }, - { - "epoch": 0.33, - "learning_rate": 4.898123324396783e-05, - "loss": 0.4895, - "step": 126 - }, - { - "epoch": 0.34, - "learning_rate": 4.896782841823057e-05, - "loss": 0.5046, - "step": 127 - }, - { - "epoch": 0.34, - "learning_rate": 4.8954423592493295e-05, - "loss": 0.3953, - "step": 128 - }, - { - "epoch": 0.34, - "learning_rate": 4.894101876675604e-05, - "loss": 0.2015, - "step": 129 - }, - { - "epoch": 0.34, - "learning_rate": 4.8927613941018765e-05, - "loss": 0.5165, - "step": 130 - }, - { - "epoch": 0.35, - "learning_rate": 4.891420911528151e-05, - "loss": 0.4237, - "step": 131 - }, - { - "epoch": 0.35, - "learning_rate": 4.8900804289544236e-05, - "loss": 0.239, - "step": 132 - }, - { - "epoch": 0.35, - "learning_rate": 4.888739946380697e-05, - "loss": 0.5515, - "step": 133 - }, - { - "epoch": 0.35, - "learning_rate": 4.8873994638069706e-05, - "loss": 0.303, - "step": 134 - }, - { - "epoch": 0.36, - "learning_rate": 4.886058981233244e-05, - "loss": 0.2867, - "step": 135 - }, - { - "epoch": 0.36, - "learning_rate": 4.8847184986595176e-05, - "loss": 0.6756, - "step": 136 - }, - { - "epoch": 0.36, - "learning_rate": 4.883378016085791e-05, - "loss": 0.4996, - "step": 137 - }, - { - "epoch": 0.37, - "learning_rate": 4.8820375335120646e-05, - "loss": 0.2798, - "step": 138 - }, - { - "epoch": 0.37, - "learning_rate": 4.880697050938338e-05, - "loss": 0.8877, - "step": 139 - }, - { - "epoch": 0.37, - "learning_rate": 4.879356568364612e-05, - "loss": 0.5022, - "step": 140 - }, - { - "epoch": 0.37, - "learning_rate": 4.878016085790885e-05, - "loss": 0.2177, - "step": 141 - }, - { - "epoch": 0.38, - "learning_rate": 4.876675603217159e-05, - "loss": 0.8708, - "step": 142 - }, - { - "epoch": 0.38, - "learning_rate": 4.8753351206434315e-05, - "loss": 0.719, - "step": 143 - }, - { - "epoch": 0.38, - "learning_rate": 4.873994638069706e-05, - "loss": 0.562, - "step": 144 - }, - { - "epoch": 0.38, - "learning_rate": 4.8726541554959786e-05, - "loss": 0.492, - "step": 145 - }, - { - "epoch": 0.39, - "learning_rate": 4.871313672922253e-05, - "loss": 0.4637, - "step": 146 - }, - { - "epoch": 0.39, - "learning_rate": 4.8699731903485256e-05, - "loss": 0.4132, - "step": 147 - }, - { - "epoch": 0.39, - "learning_rate": 4.868632707774799e-05, - "loss": 0.2889, - "step": 148 - }, - { - "epoch": 0.39, - "learning_rate": 4.8672922252010726e-05, - "loss": 0.2213, - "step": 149 - }, - { - "epoch": 0.4, - "learning_rate": 4.865951742627346e-05, - "loss": 0.9268, - "step": 150 - }, - { - "epoch": 0.4, - "learning_rate": 4.8646112600536196e-05, - "loss": 0.2852, - "step": 151 - }, - { - "epoch": 0.4, - "learning_rate": 4.863270777479893e-05, - "loss": 0.4599, - "step": 152 - }, - { - "epoch": 0.4, - "learning_rate": 4.861930294906166e-05, - "loss": 0.1913, - "step": 153 - }, - { - "epoch": 0.41, - "learning_rate": 4.86058981233244e-05, - "loss": 0.4488, - "step": 154 - }, - { - "epoch": 0.41, - "learning_rate": 4.859249329758713e-05, - "loss": 0.9022, - "step": 155 - }, - { - "epoch": 0.41, - "learning_rate": 4.857908847184987e-05, - "loss": 0.5221, - "step": 156 - }, - { - "epoch": 0.42, - "learning_rate": 4.85656836461126e-05, - "loss": 0.2394, - "step": 157 - }, - { - "epoch": 0.42, - "learning_rate": 4.8552278820375336e-05, - "loss": 0.3332, - "step": 158 - }, - { - "epoch": 0.42, - "learning_rate": 4.853887399463807e-05, - "loss": 0.4015, - "step": 159 - }, - { - "epoch": 0.42, - "learning_rate": 4.8525469168900806e-05, - "loss": 0.4461, - "step": 160 - }, - { - "epoch": 0.43, - "learning_rate": 4.851206434316354e-05, - "loss": 0.337, - "step": 161 - }, - { - "epoch": 0.43, - "learning_rate": 4.8498659517426276e-05, - "loss": 0.4908, - "step": 162 - }, - { - "epoch": 0.43, - "learning_rate": 4.848525469168901e-05, - "loss": 0.526, - "step": 163 - }, - { - "epoch": 0.43, - "learning_rate": 4.8471849865951746e-05, - "loss": 0.5262, - "step": 164 - }, - { - "epoch": 0.44, - "learning_rate": 4.845844504021448e-05, - "loss": 0.6818, - "step": 165 - }, - { - "epoch": 0.44, - "learning_rate": 4.8445040214477217e-05, - "loss": 0.3154, - "step": 166 - }, - { - "epoch": 0.44, - "learning_rate": 4.843163538873995e-05, - "loss": 0.5963, - "step": 167 - }, - { - "epoch": 0.44, - "learning_rate": 4.841823056300268e-05, - "loss": 0.4451, - "step": 168 - }, - { - "epoch": 0.45, - "learning_rate": 4.840482573726542e-05, - "loss": 0.5969, - "step": 169 - }, - { - "epoch": 0.45, - "learning_rate": 4.839142091152815e-05, - "loss": 0.438, - "step": 170 - }, - { - "epoch": 0.45, - "learning_rate": 4.837801608579089e-05, - "loss": 0.4827, - "step": 171 - }, - { - "epoch": 0.46, - "learning_rate": 4.836461126005362e-05, - "loss": 0.2029, - "step": 172 - }, - { - "epoch": 0.46, - "learning_rate": 4.8351206434316356e-05, - "loss": 0.5195, - "step": 173 - }, - { - "epoch": 0.46, - "learning_rate": 4.833780160857909e-05, - "loss": 0.517, - "step": 174 - }, - { - "epoch": 0.46, - "learning_rate": 4.8324396782841826e-05, - "loss": 0.5532, - "step": 175 - }, - { - "epoch": 0.47, - "learning_rate": 4.831099195710456e-05, - "loss": 0.4198, - "step": 176 - }, - { - "epoch": 0.47, - "learning_rate": 4.8297587131367296e-05, - "loss": 0.8386, - "step": 177 - }, - { - "epoch": 0.47, - "learning_rate": 4.8284182305630025e-05, - "loss": 0.575, - "step": 178 - }, - { - "epoch": 0.47, - "learning_rate": 4.8270777479892766e-05, - "loss": 0.6156, - "step": 179 - }, - { - "epoch": 0.48, - "learning_rate": 4.8257372654155495e-05, - "loss": 0.7044, - "step": 180 - }, - { - "epoch": 0.48, - "learning_rate": 4.824396782841824e-05, - "loss": 0.5712, - "step": 181 - }, - { - "epoch": 0.48, - "learning_rate": 4.8230563002680965e-05, - "loss": 0.34, - "step": 182 - }, - { - "epoch": 0.48, - "learning_rate": 4.82171581769437e-05, - "loss": 0.5773, - "step": 183 - }, - { - "epoch": 0.49, - "learning_rate": 4.8203753351206435e-05, - "loss": 0.546, - "step": 184 - }, - { - "epoch": 0.49, - "learning_rate": 4.819034852546917e-05, - "loss": 0.3955, - "step": 185 - }, - { - "epoch": 0.49, - "learning_rate": 4.8176943699731906e-05, - "loss": 0.5921, - "step": 186 - }, - { - "epoch": 0.49, - "learning_rate": 4.816353887399464e-05, - "loss": 0.3108, - "step": 187 - }, - { - "epoch": 0.5, - "learning_rate": 4.8150134048257376e-05, - "loss": 0.5469, - "step": 188 - }, - { - "epoch": 0.5, - "learning_rate": 4.813672922252011e-05, - "loss": 0.64, - "step": 189 - }, - { - "epoch": 0.5, - "learning_rate": 4.8123324396782846e-05, - "loss": 0.5153, - "step": 190 - }, - { - "epoch": 0.51, - "learning_rate": 4.810991957104558e-05, - "loss": 0.4719, - "step": 191 - }, - { - "epoch": 0.51, - "learning_rate": 4.8096514745308316e-05, - "loss": 0.52, - "step": 192 - }, - { - "epoch": 0.51, - "learning_rate": 4.8083109919571045e-05, - "loss": 0.5114, - "step": 193 - }, - { - "epoch": 0.51, - "learning_rate": 4.806970509383379e-05, - "loss": 0.5469, - "step": 194 - }, - { - "epoch": 0.52, - "learning_rate": 4.8056300268096515e-05, - "loss": 0.3435, - "step": 195 - }, - { - "epoch": 0.52, - "learning_rate": 4.804289544235926e-05, - "loss": 0.6469, - "step": 196 - }, - { - "epoch": 0.52, - "learning_rate": 4.8029490616621985e-05, - "loss": 0.6595, - "step": 197 - }, - { - "epoch": 0.52, - "learning_rate": 4.801608579088472e-05, - "loss": 0.5503, - "step": 198 - }, - { - "epoch": 0.53, - "learning_rate": 4.8002680965147456e-05, - "loss": 0.3799, - "step": 199 - }, - { - "epoch": 0.53, - "learning_rate": 4.798927613941019e-05, - "loss": 0.417, - "step": 200 - }, - { - "epoch": 0.53, - "learning_rate": 4.7975871313672926e-05, - "loss": 0.5281, - "step": 201 - }, - { - "epoch": 0.53, - "learning_rate": 4.796246648793566e-05, - "loss": 0.3439, - "step": 202 - }, - { - "epoch": 0.54, - "learning_rate": 4.794906166219839e-05, - "loss": 0.5777, - "step": 203 - }, - { - "epoch": 0.54, - "learning_rate": 4.793565683646113e-05, - "loss": 0.5286, - "step": 204 - }, - { - "epoch": 0.54, - "learning_rate": 4.792225201072386e-05, - "loss": 0.4302, - "step": 205 - }, - { - "epoch": 0.54, - "learning_rate": 4.79088471849866e-05, - "loss": 0.5413, - "step": 206 - }, - { - "epoch": 0.55, - "learning_rate": 4.789544235924933e-05, - "loss": 0.3087, - "step": 207 - }, - { - "epoch": 0.55, - "learning_rate": 4.7882037533512065e-05, - "loss": 0.4385, - "step": 208 - }, - { - "epoch": 0.55, - "learning_rate": 4.78686327077748e-05, - "loss": 0.7137, - "step": 209 - }, - { - "epoch": 0.56, - "learning_rate": 4.7855227882037535e-05, - "loss": 0.6278, - "step": 210 - }, - { - "epoch": 0.56, - "learning_rate": 4.784182305630027e-05, - "loss": 0.2832, - "step": 211 - }, - { - "epoch": 0.56, - "learning_rate": 4.7828418230563005e-05, - "loss": 0.5899, - "step": 212 - }, - { - "epoch": 0.56, - "learning_rate": 4.7815013404825734e-05, - "loss": 0.4421, - "step": 213 - }, - { - "epoch": 0.57, - "learning_rate": 4.7801608579088476e-05, - "loss": 0.3411, - "step": 214 - }, - { - "epoch": 0.57, - "learning_rate": 4.7788203753351204e-05, - "loss": 0.4236, - "step": 215 - }, - { - "epoch": 0.57, - "learning_rate": 4.7774798927613946e-05, - "loss": 0.4444, - "step": 216 - }, - { - "epoch": 0.57, - "learning_rate": 4.7761394101876674e-05, - "loss": 0.4765, - "step": 217 - }, - { - "epoch": 0.58, - "learning_rate": 4.774798927613941e-05, - "loss": 0.2453, - "step": 218 - }, - { - "epoch": 0.58, - "learning_rate": 4.7734584450402145e-05, - "loss": 0.2765, - "step": 219 - }, - { - "epoch": 0.58, - "learning_rate": 4.772117962466488e-05, - "loss": 0.2075, - "step": 220 - }, - { - "epoch": 0.58, - "learning_rate": 4.7707774798927615e-05, - "loss": 0.5905, - "step": 221 - }, - { - "epoch": 0.59, - "learning_rate": 4.769436997319035e-05, - "loss": 0.3457, - "step": 222 - }, - { - "epoch": 0.59, - "learning_rate": 4.7680965147453085e-05, - "loss": 0.5986, - "step": 223 - }, - { - "epoch": 0.59, - "learning_rate": 4.766756032171582e-05, - "loss": 0.3881, - "step": 224 - }, - { - "epoch": 0.6, - "learning_rate": 4.7654155495978555e-05, - "loss": 0.5655, - "step": 225 - }, - { - "epoch": 0.6, - "learning_rate": 4.764075067024129e-05, - "loss": 0.6156, - "step": 226 - }, - { - "epoch": 0.6, - "learning_rate": 4.7627345844504026e-05, - "loss": 0.4784, - "step": 227 - }, - { - "epoch": 0.6, - "learning_rate": 4.7613941018766754e-05, - "loss": 0.5195, - "step": 228 - }, - { - "epoch": 0.61, - "learning_rate": 4.7600536193029496e-05, - "loss": 0.66, - "step": 229 - }, - { - "epoch": 0.61, - "learning_rate": 4.7587131367292224e-05, - "loss": 0.4104, - "step": 230 - }, - { - "epoch": 0.61, - "learning_rate": 4.7573726541554966e-05, - "loss": 0.3478, - "step": 231 - }, - { - "epoch": 0.61, - "learning_rate": 4.7560321715817695e-05, - "loss": 0.2037, - "step": 232 - }, - { - "epoch": 0.62, - "learning_rate": 4.754691689008043e-05, - "loss": 0.75, - "step": 233 - }, - { - "epoch": 0.62, - "learning_rate": 4.7533512064343165e-05, - "loss": 0.4237, - "step": 234 - }, - { - "epoch": 0.62, - "learning_rate": 4.75201072386059e-05, - "loss": 0.2372, - "step": 235 - }, - { - "epoch": 0.62, - "learning_rate": 4.7506702412868635e-05, - "loss": 0.7874, - "step": 236 - }, - { - "epoch": 0.63, - "learning_rate": 4.749329758713137e-05, - "loss": 0.5751, - "step": 237 - }, - { - "epoch": 0.63, - "learning_rate": 4.7479892761394105e-05, - "loss": 0.5801, - "step": 238 - }, - { - "epoch": 0.63, - "learning_rate": 4.746648793565684e-05, - "loss": 0.4983, - "step": 239 - }, - { - "epoch": 0.63, - "learning_rate": 4.745308310991957e-05, - "loss": 0.4215, - "step": 240 - }, - { - "epoch": 0.64, - "learning_rate": 4.743967828418231e-05, - "loss": 0.3655, - "step": 241 - }, - { - "epoch": 0.64, - "learning_rate": 4.742627345844504e-05, - "loss": 0.523, - "step": 242 - }, - { - "epoch": 0.64, - "learning_rate": 4.741286863270778e-05, - "loss": 0.5952, - "step": 243 - }, - { - "epoch": 0.65, - "learning_rate": 4.739946380697051e-05, - "loss": 0.4226, - "step": 244 - }, - { - "epoch": 0.65, - "learning_rate": 4.7386058981233244e-05, - "loss": 0.316, - "step": 245 - }, - { - "epoch": 0.65, - "learning_rate": 4.737265415549598e-05, - "loss": 0.5096, - "step": 246 - }, - { - "epoch": 0.65, - "learning_rate": 4.7359249329758715e-05, - "loss": 0.435, - "step": 247 - }, - { - "epoch": 0.66, - "learning_rate": 4.734584450402145e-05, - "loss": 0.7516, - "step": 248 - }, - { - "epoch": 0.66, - "learning_rate": 4.7332439678284185e-05, - "loss": 0.531, - "step": 249 - }, - { - "epoch": 0.66, - "learning_rate": 4.731903485254692e-05, - "loss": 0.4096, - "step": 250 - }, - { - "epoch": 0.66, - "learning_rate": 4.7305630026809655e-05, - "loss": 0.387, - "step": 251 - }, - { - "epoch": 0.67, - "learning_rate": 4.729222520107239e-05, - "loss": 0.5468, - "step": 252 - }, - { - "epoch": 0.67, - "learning_rate": 4.7278820375335125e-05, - "loss": 0.4613, - "step": 253 - }, - { - "epoch": 0.67, - "learning_rate": 4.726541554959786e-05, - "loss": 0.8437, - "step": 254 - }, - { - "epoch": 0.67, - "learning_rate": 4.725201072386059e-05, - "loss": 0.522, - "step": 255 - }, - { - "epoch": 0.68, - "learning_rate": 4.723860589812333e-05, - "loss": 0.3922, - "step": 256 - }, - { - "epoch": 0.68, - "learning_rate": 4.722520107238606e-05, - "loss": 0.5114, - "step": 257 - }, - { - "epoch": 0.68, - "learning_rate": 4.72117962466488e-05, - "loss": 0.6148, - "step": 258 - }, - { - "epoch": 0.69, - "learning_rate": 4.719839142091153e-05, - "loss": 0.4578, - "step": 259 - }, - { - "epoch": 0.69, - "learning_rate": 4.7184986595174265e-05, - "loss": 0.6286, - "step": 260 - }, - { - "epoch": 0.69, - "learning_rate": 4.7171581769437e-05, - "loss": 0.5883, - "step": 261 - }, - { - "epoch": 0.69, - "learning_rate": 4.7158176943699735e-05, - "loss": 0.5634, - "step": 262 - }, - { - "epoch": 0.7, - "learning_rate": 4.714477211796247e-05, - "loss": 0.4085, - "step": 263 - }, - { - "epoch": 0.7, - "learning_rate": 4.7131367292225205e-05, - "loss": 0.2988, - "step": 264 - }, - { - "epoch": 0.7, - "learning_rate": 4.7117962466487934e-05, - "loss": 0.6353, - "step": 265 - }, - { - "epoch": 0.7, - "learning_rate": 4.7104557640750675e-05, - "loss": 0.4598, - "step": 266 - }, - { - "epoch": 0.71, - "learning_rate": 4.7091152815013404e-05, - "loss": 0.5072, - "step": 267 - }, - { - "epoch": 0.71, - "learning_rate": 4.7077747989276146e-05, - "loss": 0.49, - "step": 268 - }, - { - "epoch": 0.71, - "learning_rate": 4.7064343163538874e-05, - "loss": 0.7225, - "step": 269 - }, - { - "epoch": 0.71, - "learning_rate": 4.705093833780161e-05, - "loss": 0.5332, - "step": 270 - }, - { - "epoch": 0.72, - "learning_rate": 4.7037533512064344e-05, - "loss": 0.6064, - "step": 271 - }, - { - "epoch": 0.72, - "learning_rate": 4.702412868632708e-05, - "loss": 0.3518, - "step": 272 - }, - { - "epoch": 0.72, - "learning_rate": 4.7010723860589815e-05, - "loss": 0.3673, - "step": 273 - }, - { - "epoch": 0.72, - "learning_rate": 4.699731903485255e-05, - "loss": 0.4688, - "step": 274 - }, - { - "epoch": 0.73, - "learning_rate": 4.6983914209115285e-05, - "loss": 0.5389, - "step": 275 - }, - { - "epoch": 0.73, - "learning_rate": 4.697050938337802e-05, - "loss": 0.374, - "step": 276 - }, - { - "epoch": 0.73, - "learning_rate": 4.6957104557640755e-05, - "loss": 0.475, - "step": 277 - }, - { - "epoch": 0.74, - "learning_rate": 4.694369973190349e-05, - "loss": 0.5397, - "step": 278 - }, - { - "epoch": 0.74, - "learning_rate": 4.6930294906166225e-05, - "loss": 0.3821, - "step": 279 - }, - { - "epoch": 0.74, - "learning_rate": 4.6916890080428954e-05, - "loss": 0.3372, - "step": 280 - }, - { - "epoch": 0.74, - "learning_rate": 4.6903485254691696e-05, - "loss": 0.6652, - "step": 281 - }, - { - "epoch": 0.75, - "learning_rate": 4.6890080428954424e-05, - "loss": 0.2894, - "step": 282 - }, - { - "epoch": 0.75, - "learning_rate": 4.6876675603217166e-05, - "loss": 0.5639, - "step": 283 - }, - { - "epoch": 0.75, - "learning_rate": 4.6863270777479894e-05, - "loss": 0.353, - "step": 284 - }, - { - "epoch": 0.75, - "learning_rate": 4.684986595174263e-05, - "loss": 0.2932, - "step": 285 - }, - { - "epoch": 0.76, - "learning_rate": 4.6836461126005364e-05, - "loss": 0.467, - "step": 286 - }, - { - "epoch": 0.76, - "learning_rate": 4.68230563002681e-05, - "loss": 0.4732, - "step": 287 - }, - { - "epoch": 0.76, - "learning_rate": 4.6809651474530835e-05, - "loss": 0.1808, - "step": 288 - }, - { - "epoch": 0.76, - "learning_rate": 4.679624664879357e-05, - "loss": 0.6031, - "step": 289 - }, - { - "epoch": 0.77, - "learning_rate": 4.67828418230563e-05, - "loss": 0.2555, - "step": 290 - }, - { - "epoch": 0.77, - "learning_rate": 4.676943699731904e-05, - "loss": 0.4041, - "step": 291 - }, - { - "epoch": 0.77, - "learning_rate": 4.675603217158177e-05, - "loss": 0.7822, - "step": 292 - }, - { - "epoch": 0.78, - "learning_rate": 4.674262734584451e-05, - "loss": 0.138, - "step": 293 - }, - { - "epoch": 0.78, - "learning_rate": 4.672922252010724e-05, - "loss": 0.2746, - "step": 294 - }, - { - "epoch": 0.78, - "learning_rate": 4.6715817694369974e-05, - "loss": 0.5835, - "step": 295 - }, - { - "epoch": 0.78, - "learning_rate": 4.670241286863271e-05, - "loss": 0.2367, - "step": 296 - }, - { - "epoch": 0.79, - "learning_rate": 4.6689008042895444e-05, - "loss": 0.3247, - "step": 297 - }, - { - "epoch": 0.79, - "learning_rate": 4.667560321715818e-05, - "loss": 0.306, - "step": 298 - }, - { - "epoch": 0.79, - "learning_rate": 4.6662198391420914e-05, - "loss": 0.2825, - "step": 299 - }, - { - "epoch": 0.79, - "learning_rate": 4.664879356568364e-05, - "loss": 0.6102, - "step": 300 - }, - { - "epoch": 0.8, - "learning_rate": 4.6635388739946385e-05, - "loss": 0.3613, - "step": 301 - }, - { - "epoch": 0.8, - "learning_rate": 4.662198391420911e-05, - "loss": 0.6327, - "step": 302 - }, - { - "epoch": 0.8, - "learning_rate": 4.6608579088471855e-05, - "loss": 0.4059, - "step": 303 - }, - { - "epoch": 0.8, - "learning_rate": 4.659517426273458e-05, - "loss": 0.4027, - "step": 304 - }, - { - "epoch": 0.81, - "learning_rate": 4.658176943699732e-05, - "loss": 0.9133, - "step": 305 - }, - { - "epoch": 0.81, - "learning_rate": 4.6568364611260054e-05, - "loss": 0.1869, - "step": 306 - }, - { - "epoch": 0.81, - "learning_rate": 4.655495978552279e-05, - "loss": 0.3987, - "step": 307 - }, - { - "epoch": 0.81, - "learning_rate": 4.6541554959785524e-05, - "loss": 0.6114, - "step": 308 - }, - { - "epoch": 0.82, - "learning_rate": 4.652815013404826e-05, - "loss": 0.4406, - "step": 309 - }, - { - "epoch": 0.82, - "learning_rate": 4.6514745308310994e-05, - "loss": 0.2954, - "step": 310 - }, - { - "epoch": 0.82, - "learning_rate": 4.650134048257373e-05, - "loss": 0.5067, - "step": 311 - }, - { - "epoch": 0.83, - "learning_rate": 4.6487935656836464e-05, - "loss": 0.3985, - "step": 312 - }, - { - "epoch": 0.83, - "learning_rate": 4.64745308310992e-05, - "loss": 0.3756, - "step": 313 - }, - { - "epoch": 0.83, - "learning_rate": 4.6461126005361935e-05, - "loss": 0.2618, - "step": 314 - }, - { - "epoch": 0.83, - "learning_rate": 4.644772117962466e-05, - "loss": 0.4992, - "step": 315 - }, - { - "epoch": 0.84, - "learning_rate": 4.6434316353887405e-05, - "loss": 0.8224, - "step": 316 - }, - { - "epoch": 0.84, - "learning_rate": 4.642091152815013e-05, - "loss": 0.3425, - "step": 317 - }, - { - "epoch": 0.84, - "learning_rate": 4.6407506702412875e-05, - "loss": 0.4062, - "step": 318 - }, - { - "epoch": 0.84, - "learning_rate": 4.6394101876675603e-05, - "loss": 0.4748, - "step": 319 - }, - { - "epoch": 0.85, - "learning_rate": 4.638069705093834e-05, - "loss": 0.6857, - "step": 320 - }, - { - "epoch": 0.85, - "learning_rate": 4.6367292225201074e-05, - "loss": 0.5368, - "step": 321 - }, - { - "epoch": 0.85, - "learning_rate": 4.635388739946381e-05, - "loss": 0.5571, - "step": 322 - }, - { - "epoch": 0.85, - "learning_rate": 4.6340482573726544e-05, - "loss": 0.3045, - "step": 323 - }, - { - "epoch": 0.86, - "learning_rate": 4.632707774798928e-05, - "loss": 0.3189, - "step": 324 - }, - { - "epoch": 0.86, - "learning_rate": 4.631367292225201e-05, - "loss": 0.3033, - "step": 325 - }, - { - "epoch": 0.86, - "learning_rate": 4.630026809651475e-05, - "loss": 0.4659, - "step": 326 - }, - { - "epoch": 0.87, - "learning_rate": 4.628686327077748e-05, - "loss": 0.4058, - "step": 327 - }, - { - "epoch": 0.87, - "learning_rate": 4.627345844504022e-05, - "loss": 0.6745, - "step": 328 - }, - { - "epoch": 0.87, - "learning_rate": 4.626005361930295e-05, - "loss": 0.3259, - "step": 329 - }, - { - "epoch": 0.87, - "learning_rate": 4.624664879356568e-05, - "loss": 0.5126, - "step": 330 - }, - { - "epoch": 0.88, - "learning_rate": 4.623324396782842e-05, - "loss": 0.2759, - "step": 331 - }, - { - "epoch": 0.88, - "learning_rate": 4.621983914209115e-05, - "loss": 0.2512, - "step": 332 - }, - { - "epoch": 0.88, - "learning_rate": 4.620643431635389e-05, - "loss": 0.3046, - "step": 333 - }, - { - "epoch": 0.88, - "learning_rate": 4.6193029490616624e-05, - "loss": 0.3931, - "step": 334 - }, - { - "epoch": 0.89, - "learning_rate": 4.617962466487936e-05, - "loss": 0.4838, - "step": 335 - }, - { - "epoch": 0.89, - "learning_rate": 4.6166219839142094e-05, - "loss": 0.2925, - "step": 336 - }, - { - "epoch": 0.89, - "learning_rate": 4.615281501340483e-05, - "loss": 0.4481, - "step": 337 - }, - { - "epoch": 0.89, - "learning_rate": 4.6139410187667564e-05, - "loss": 0.4528, - "step": 338 - }, - { - "epoch": 0.9, - "learning_rate": 4.61260053619303e-05, - "loss": 0.2934, - "step": 339 - }, - { - "epoch": 0.9, - "learning_rate": 4.611260053619303e-05, - "loss": 0.609, - "step": 340 - }, - { - "epoch": 0.9, - "learning_rate": 4.609919571045577e-05, - "loss": 0.8988, - "step": 341 - }, - { - "epoch": 0.9, - "learning_rate": 4.60857908847185e-05, - "loss": 1.1222, - "step": 342 - }, - { - "epoch": 0.91, - "learning_rate": 4.607238605898124e-05, - "loss": 0.3265, - "step": 343 - }, - { - "epoch": 0.91, - "learning_rate": 4.605898123324397e-05, - "loss": 0.4722, - "step": 344 - }, - { - "epoch": 0.91, - "learning_rate": 4.60455764075067e-05, - "loss": 0.2791, - "step": 345 - }, - { - "epoch": 0.92, - "learning_rate": 4.603217158176944e-05, - "loss": 0.4183, - "step": 346 - }, - { - "epoch": 0.92, - "learning_rate": 4.6018766756032174e-05, - "loss": 0.5323, - "step": 347 - }, - { - "epoch": 0.92, - "learning_rate": 4.600536193029491e-05, - "loss": 0.6108, - "step": 348 - }, - { - "epoch": 0.92, - "learning_rate": 4.5991957104557644e-05, - "loss": 0.2875, - "step": 349 - }, - { - "epoch": 0.93, - "learning_rate": 4.597855227882037e-05, - "loss": 0.6642, - "step": 350 - }, - { - "epoch": 0.93, - "learning_rate": 4.5965147453083114e-05, - "loss": 0.5244, - "step": 351 - }, - { - "epoch": 0.93, - "learning_rate": 4.595174262734584e-05, - "loss": 0.5562, - "step": 352 - }, - { - "epoch": 0.93, - "learning_rate": 4.5938337801608584e-05, - "loss": 0.3972, - "step": 353 - }, - { - "epoch": 0.94, - "learning_rate": 4.592493297587131e-05, - "loss": 0.4532, - "step": 354 - }, - { - "epoch": 0.94, - "learning_rate": 4.591152815013405e-05, - "loss": 0.3368, - "step": 355 - }, - { - "epoch": 0.94, - "learning_rate": 4.589812332439678e-05, - "loss": 0.171, - "step": 356 - }, - { - "epoch": 0.94, - "learning_rate": 4.588471849865952e-05, - "loss": 0.4036, - "step": 357 - }, - { - "epoch": 0.95, - "learning_rate": 4.587131367292225e-05, - "loss": 0.4305, - "step": 358 - }, - { - "epoch": 0.95, - "learning_rate": 4.585790884718499e-05, - "loss": 0.2643, - "step": 359 - }, - { - "epoch": 0.95, - "learning_rate": 4.5844504021447723e-05, - "loss": 0.3782, - "step": 360 - }, - { - "epoch": 0.96, - "learning_rate": 4.583109919571046e-05, - "loss": 0.1673, - "step": 361 - }, - { - "epoch": 0.96, - "learning_rate": 4.5817694369973194e-05, - "loss": 0.3969, - "step": 362 - }, - { - "epoch": 0.96, - "learning_rate": 4.580428954423593e-05, - "loss": 0.3249, - "step": 363 - }, - { - "epoch": 0.96, - "learning_rate": 4.5790884718498664e-05, - "loss": 0.1656, - "step": 364 - }, - { - "epoch": 0.97, - "learning_rate": 4.57774798927614e-05, - "loss": 0.4551, - "step": 365 - }, - { - "epoch": 0.97, - "learning_rate": 4.5764075067024134e-05, - "loss": 0.6075, - "step": 366 - }, - { - "epoch": 0.97, - "learning_rate": 4.575067024128686e-05, - "loss": 0.4699, - "step": 367 - }, - { - "epoch": 0.97, - "learning_rate": 4.5737265415549605e-05, - "loss": 0.5752, - "step": 368 - }, - { - "epoch": 0.98, - "learning_rate": 4.572386058981233e-05, - "loss": 0.3114, - "step": 369 - }, - { - "epoch": 0.98, - "learning_rate": 4.5710455764075075e-05, - "loss": 0.7407, - "step": 370 - }, - { - "epoch": 0.98, - "learning_rate": 4.56970509383378e-05, - "loss": 0.6427, - "step": 371 - }, - { - "epoch": 0.98, - "learning_rate": 4.568364611260054e-05, - "loss": 0.5021, - "step": 372 - }, - { - "epoch": 0.99, - "learning_rate": 4.5670241286863273e-05, - "loss": 0.4209, - "step": 373 - }, - { - "epoch": 0.99, - "learning_rate": 4.565683646112601e-05, - "loss": 0.5957, - "step": 374 - }, - { - "epoch": 0.99, - "learning_rate": 4.5643431635388744e-05, - "loss": 0.495, - "step": 375 - }, - { - "epoch": 0.99, - "learning_rate": 4.563002680965148e-05, - "loss": 0.3101, - "step": 376 - }, - { - "epoch": 1.0, - "learning_rate": 4.561662198391421e-05, - "loss": 0.472, - "step": 377 - }, - { - "epoch": 1.0, - "learning_rate": 4.560321715817695e-05, - "loss": 0.4607, - "step": 378 - }, - { - "epoch": 1.0, - "eval_f1": 0.7914963205233032, - "eval_loss": 0.40209120512008667, - "eval_runtime": 2.1585, - "eval_samples_per_second": 700.949, - "eval_steps_per_second": 44.012, - "step": 378 - }, - { - "epoch": 1.0, - "learning_rate": 4.558981233243968e-05, - "loss": 0.2422, - "step": 379 - }, - { - "epoch": 1.01, - "learning_rate": 4.557640750670242e-05, - "loss": 0.2074, - "step": 380 - }, - { - "epoch": 1.01, - "learning_rate": 4.556300268096515e-05, - "loss": 0.4469, - "step": 381 - }, - { - "epoch": 1.01, - "learning_rate": 4.554959785522788e-05, - "loss": 0.3121, - "step": 382 - }, - { - "epoch": 1.01, - "learning_rate": 4.553619302949062e-05, - "loss": 0.3634, - "step": 383 - }, - { - "epoch": 1.02, - "learning_rate": 4.552278820375335e-05, - "loss": 0.2139, - "step": 384 - }, - { - "epoch": 1.02, - "learning_rate": 4.550938337801609e-05, - "loss": 0.5273, - "step": 385 - }, - { - "epoch": 1.02, - "learning_rate": 4.549597855227882e-05, - "loss": 0.1228, - "step": 386 - }, - { - "epoch": 1.02, - "learning_rate": 4.548257372654156e-05, - "loss": 0.2459, - "step": 387 - }, - { - "epoch": 1.03, - "learning_rate": 4.5469168900804294e-05, - "loss": 0.443, - "step": 388 - }, - { - "epoch": 1.03, - "learning_rate": 4.545576407506703e-05, - "loss": 0.364, - "step": 389 - }, - { - "epoch": 1.03, - "learning_rate": 4.5442359249329764e-05, - "loss": 0.4071, - "step": 390 - }, - { - "epoch": 1.03, - "learning_rate": 4.54289544235925e-05, - "loss": 0.3876, - "step": 391 - }, - { - "epoch": 1.04, - "learning_rate": 4.541554959785523e-05, - "loss": 0.5239, - "step": 392 - }, - { - "epoch": 1.04, - "learning_rate": 4.540214477211797e-05, - "loss": 0.4548, - "step": 393 - }, - { - "epoch": 1.04, - "learning_rate": 4.53887399463807e-05, - "loss": 0.3588, - "step": 394 - }, - { - "epoch": 1.04, - "learning_rate": 4.537533512064344e-05, - "loss": 0.2693, - "step": 395 - }, - { - "epoch": 1.05, - "learning_rate": 4.536193029490617e-05, - "loss": 0.3773, - "step": 396 - }, - { - "epoch": 1.05, - "learning_rate": 4.53485254691689e-05, - "loss": 0.3982, - "step": 397 - }, - { - "epoch": 1.05, - "learning_rate": 4.533512064343164e-05, - "loss": 0.2074, - "step": 398 - }, - { - "epoch": 1.06, - "learning_rate": 4.532171581769437e-05, - "loss": 0.3322, - "step": 399 - }, - { - "epoch": 1.06, - "learning_rate": 4.530831099195711e-05, - "loss": 0.118, - "step": 400 - }, - { - "epoch": 1.06, - "learning_rate": 4.5294906166219844e-05, - "loss": 0.6216, - "step": 401 - }, - { - "epoch": 1.06, - "learning_rate": 4.528150134048257e-05, - "loss": 0.4028, - "step": 402 - }, - { - "epoch": 1.07, - "learning_rate": 4.5268096514745314e-05, - "loss": 0.3179, - "step": 403 - }, - { - "epoch": 1.07, - "learning_rate": 4.525469168900804e-05, - "loss": 0.2815, - "step": 404 - }, - { - "epoch": 1.07, - "learning_rate": 4.5241286863270784e-05, - "loss": 0.2366, - "step": 405 - }, - { - "epoch": 1.07, - "learning_rate": 4.522788203753351e-05, - "loss": 0.14, - "step": 406 - }, - { - "epoch": 1.08, - "learning_rate": 4.521447721179625e-05, - "loss": 0.1255, - "step": 407 - }, - { - "epoch": 1.08, - "learning_rate": 4.520107238605898e-05, - "loss": 0.1482, - "step": 408 - }, - { - "epoch": 1.08, - "learning_rate": 4.518766756032172e-05, - "loss": 0.665, - "step": 409 - }, - { - "epoch": 1.08, - "learning_rate": 4.517426273458445e-05, - "loss": 0.2535, - "step": 410 - }, - { - "epoch": 1.09, - "learning_rate": 4.516085790884719e-05, - "loss": 0.1716, - "step": 411 - }, - { - "epoch": 1.09, - "learning_rate": 4.5147453083109916e-05, - "loss": 0.0765, - "step": 412 - }, - { - "epoch": 1.09, - "learning_rate": 4.513404825737266e-05, - "loss": 0.1238, - "step": 413 - }, - { - "epoch": 1.1, - "learning_rate": 4.512064343163539e-05, - "loss": 0.3877, - "step": 414 - }, - { - "epoch": 1.1, - "learning_rate": 4.510723860589813e-05, - "loss": 0.206, - "step": 415 - }, - { - "epoch": 1.1, - "learning_rate": 4.509383378016086e-05, - "loss": 0.1822, - "step": 416 - }, - { - "epoch": 1.1, - "learning_rate": 4.508042895442359e-05, - "loss": 0.2294, - "step": 417 - }, - { - "epoch": 1.11, - "learning_rate": 4.506702412868633e-05, - "loss": 0.7083, - "step": 418 - }, - { - "epoch": 1.11, - "learning_rate": 4.505361930294906e-05, - "loss": 0.325, - "step": 419 - }, - { - "epoch": 1.11, - "learning_rate": 4.50402144772118e-05, - "loss": 0.2378, - "step": 420 - }, - { - "epoch": 1.11, - "learning_rate": 4.502680965147453e-05, - "loss": 0.2494, - "step": 421 - }, - { - "epoch": 1.12, - "learning_rate": 4.501340482573727e-05, - "loss": 0.3812, - "step": 422 - }, - { - "epoch": 1.12, - "learning_rate": 4.5e-05, - "loss": 0.1491, - "step": 423 - }, - { - "epoch": 1.12, - "learning_rate": 4.498659517426274e-05, - "loss": 0.0833, - "step": 424 - }, - { - "epoch": 1.12, - "learning_rate": 4.497319034852547e-05, - "loss": 0.4637, - "step": 425 - }, - { - "epoch": 1.13, - "learning_rate": 4.495978552278821e-05, - "loss": 0.2594, - "step": 426 - }, - { - "epoch": 1.13, - "learning_rate": 4.4946380697050937e-05, - "loss": 0.4136, - "step": 427 - }, - { - "epoch": 1.13, - "learning_rate": 4.493297587131368e-05, - "loss": 0.4962, - "step": 428 - }, - { - "epoch": 1.13, - "learning_rate": 4.491957104557641e-05, - "loss": 0.0842, - "step": 429 - }, - { - "epoch": 1.14, - "learning_rate": 4.490616621983915e-05, - "loss": 0.4761, - "step": 430 - }, - { - "epoch": 1.14, - "learning_rate": 4.489276139410188e-05, - "loss": 0.4351, - "step": 431 - }, - { - "epoch": 1.14, - "learning_rate": 4.487935656836461e-05, - "loss": 0.4226, - "step": 432 - }, - { - "epoch": 1.15, - "learning_rate": 4.486595174262735e-05, - "loss": 0.5714, - "step": 433 - }, - { - "epoch": 1.15, - "learning_rate": 4.485254691689008e-05, - "loss": 0.2034, - "step": 434 - }, - { - "epoch": 1.15, - "learning_rate": 4.483914209115282e-05, - "loss": 0.3354, - "step": 435 - }, - { - "epoch": 1.15, - "learning_rate": 4.482573726541555e-05, - "loss": 0.7503, - "step": 436 - }, - { - "epoch": 1.16, - "learning_rate": 4.481233243967828e-05, - "loss": 0.1542, - "step": 437 - }, - { - "epoch": 1.16, - "learning_rate": 4.479892761394102e-05, - "loss": 0.3067, - "step": 438 - }, - { - "epoch": 1.16, - "learning_rate": 4.478552278820375e-05, - "loss": 0.4134, - "step": 439 - }, - { - "epoch": 1.16, - "learning_rate": 4.477211796246649e-05, - "loss": 0.1458, - "step": 440 - }, - { - "epoch": 1.17, - "learning_rate": 4.475871313672922e-05, - "loss": 0.2814, - "step": 441 - }, - { - "epoch": 1.17, - "learning_rate": 4.474530831099196e-05, - "loss": 0.1751, - "step": 442 - }, - { - "epoch": 1.17, - "learning_rate": 4.473190348525469e-05, - "loss": 0.4144, - "step": 443 - }, - { - "epoch": 1.17, - "learning_rate": 4.471849865951743e-05, - "loss": 0.3275, - "step": 444 - }, - { - "epoch": 1.18, - "learning_rate": 4.470509383378016e-05, - "loss": 0.3083, - "step": 445 - }, - { - "epoch": 1.18, - "learning_rate": 4.46916890080429e-05, - "loss": 0.4185, - "step": 446 - }, - { - "epoch": 1.18, - "learning_rate": 4.467828418230563e-05, - "loss": 0.3181, - "step": 447 - }, - { - "epoch": 1.19, - "learning_rate": 4.466487935656837e-05, - "loss": 0.438, - "step": 448 - }, - { - "epoch": 1.19, - "learning_rate": 4.46514745308311e-05, - "loss": 0.4509, - "step": 449 - }, - { - "epoch": 1.19, - "learning_rate": 4.463806970509384e-05, - "loss": 0.4597, - "step": 450 - }, - { - "epoch": 1.19, - "learning_rate": 4.462466487935657e-05, - "loss": 0.3123, - "step": 451 - }, - { - "epoch": 1.2, - "learning_rate": 4.46112600536193e-05, - "loss": 0.376, - "step": 452 - }, - { - "epoch": 1.2, - "learning_rate": 4.459785522788204e-05, - "loss": 0.2392, - "step": 453 - }, - { - "epoch": 1.2, - "learning_rate": 4.458445040214477e-05, - "loss": 0.4451, - "step": 454 - }, - { - "epoch": 1.2, - "learning_rate": 4.4571045576407513e-05, - "loss": 0.2436, - "step": 455 - }, - { - "epoch": 1.21, - "learning_rate": 4.455764075067024e-05, - "loss": 0.2944, - "step": 456 - }, - { - "epoch": 1.21, - "learning_rate": 4.454423592493298e-05, - "loss": 0.323, - "step": 457 - }, - { - "epoch": 1.21, - "learning_rate": 4.453083109919571e-05, - "loss": 0.1966, - "step": 458 - }, - { - "epoch": 1.21, - "learning_rate": 4.451742627345845e-05, - "loss": 0.273, - "step": 459 - }, - { - "epoch": 1.22, - "learning_rate": 4.450402144772118e-05, - "loss": 0.3498, - "step": 460 - }, - { - "epoch": 1.22, - "learning_rate": 4.449061662198392e-05, - "loss": 0.458, - "step": 461 - }, - { - "epoch": 1.22, - "learning_rate": 4.4477211796246646e-05, - "loss": 0.1607, - "step": 462 - }, - { - "epoch": 1.22, - "learning_rate": 4.446380697050939e-05, - "loss": 0.5712, - "step": 463 - }, - { - "epoch": 1.23, - "learning_rate": 4.4450402144772116e-05, - "loss": 0.0908, - "step": 464 - }, - { - "epoch": 1.23, - "learning_rate": 4.443699731903486e-05, - "loss": 0.2625, - "step": 465 - }, - { - "epoch": 1.23, - "learning_rate": 4.4423592493297586e-05, - "loss": 0.368, - "step": 466 - }, - { - "epoch": 1.24, - "learning_rate": 4.441018766756032e-05, - "loss": 0.5096, - "step": 467 - }, - { - "epoch": 1.24, - "learning_rate": 4.4396782841823057e-05, - "loss": 0.209, - "step": 468 - }, - { - "epoch": 1.24, - "learning_rate": 4.438337801608579e-05, - "loss": 0.5235, - "step": 469 - }, - { - "epoch": 1.24, - "learning_rate": 4.436997319034853e-05, - "loss": 0.5116, - "step": 470 - }, - { - "epoch": 1.25, - "learning_rate": 4.435656836461126e-05, - "loss": 0.49, - "step": 471 - }, - { - "epoch": 1.25, - "learning_rate": 4.4343163538874e-05, - "loss": 0.4409, - "step": 472 - }, - { - "epoch": 1.25, - "learning_rate": 4.432975871313673e-05, - "loss": 0.1324, - "step": 473 - }, - { - "epoch": 1.25, - "learning_rate": 4.431635388739947e-05, - "loss": 0.1257, - "step": 474 - }, - { - "epoch": 1.26, - "learning_rate": 4.43029490616622e-05, - "loss": 0.3844, - "step": 475 - }, - { - "epoch": 1.26, - "learning_rate": 4.428954423592494e-05, - "loss": 0.2942, - "step": 476 - }, - { - "epoch": 1.26, - "learning_rate": 4.4276139410187666e-05, - "loss": 0.316, - "step": 477 - }, - { - "epoch": 1.26, - "learning_rate": 4.426273458445041e-05, - "loss": 0.3724, - "step": 478 - }, - { - "epoch": 1.27, - "learning_rate": 4.4249329758713136e-05, - "loss": 0.4498, - "step": 479 - }, - { - "epoch": 1.27, - "learning_rate": 4.423592493297588e-05, - "loss": 0.2677, - "step": 480 - }, - { - "epoch": 1.27, - "learning_rate": 4.4222520107238607e-05, - "loss": 0.3331, - "step": 481 - }, - { - "epoch": 1.28, - "learning_rate": 4.420911528150134e-05, - "loss": 0.4375, - "step": 482 - }, - { - "epoch": 1.28, - "learning_rate": 4.419571045576408e-05, - "loss": 0.3097, - "step": 483 - }, - { - "epoch": 1.28, - "learning_rate": 4.418230563002681e-05, - "loss": 0.4438, - "step": 484 - }, - { - "epoch": 1.28, - "learning_rate": 4.416890080428955e-05, - "loss": 0.3648, - "step": 485 - }, - { - "epoch": 1.29, - "learning_rate": 4.415549597855228e-05, - "loss": 0.3023, - "step": 486 - }, - { - "epoch": 1.29, - "learning_rate": 4.414209115281502e-05, - "loss": 0.2963, - "step": 487 - }, - { - "epoch": 1.29, - "learning_rate": 4.412868632707775e-05, - "loss": 0.1749, - "step": 488 - }, - { - "epoch": 1.29, - "learning_rate": 4.411528150134048e-05, - "loss": 0.2006, - "step": 489 - }, - { - "epoch": 1.3, - "learning_rate": 4.410187667560322e-05, - "loss": 0.0923, - "step": 490 - }, - { - "epoch": 1.3, - "learning_rate": 4.408847184986595e-05, - "loss": 0.2665, - "step": 491 - }, - { - "epoch": 1.3, - "learning_rate": 4.407506702412869e-05, - "loss": 0.3378, - "step": 492 - }, - { - "epoch": 1.3, - "learning_rate": 4.406166219839142e-05, - "loss": 0.2543, - "step": 493 - }, - { - "epoch": 1.31, - "learning_rate": 4.4048257372654156e-05, - "loss": 0.2521, - "step": 494 - }, - { - "epoch": 1.31, - "learning_rate": 4.403485254691689e-05, - "loss": 0.1616, - "step": 495 - }, - { - "epoch": 1.31, - "learning_rate": 4.402144772117963e-05, - "loss": 0.1445, - "step": 496 - }, - { - "epoch": 1.31, - "learning_rate": 4.400804289544236e-05, - "loss": 0.5352, - "step": 497 - }, - { - "epoch": 1.32, - "learning_rate": 4.39946380697051e-05, - "loss": 0.0654, - "step": 498 - }, - { - "epoch": 1.32, - "learning_rate": 4.398123324396783e-05, - "loss": 0.4354, - "step": 499 - }, - { - "epoch": 1.32, - "learning_rate": 4.396782841823057e-05, - "loss": 0.3893, - "step": 500 - }, - { - "epoch": 1.33, - "learning_rate": 4.39544235924933e-05, - "loss": 0.2779, - "step": 501 - }, - { - "epoch": 1.33, - "learning_rate": 4.394101876675604e-05, - "loss": 0.3702, - "step": 502 - }, - { - "epoch": 1.33, - "learning_rate": 4.392761394101877e-05, - "loss": 0.3899, - "step": 503 - }, - { - "epoch": 1.33, - "learning_rate": 4.39142091152815e-05, - "loss": 0.3091, - "step": 504 - }, - { - "epoch": 1.34, - "learning_rate": 4.390080428954424e-05, - "loss": 0.2143, - "step": 505 - }, - { - "epoch": 1.34, - "learning_rate": 4.388739946380697e-05, - "loss": 0.2551, - "step": 506 - }, - { - "epoch": 1.34, - "learning_rate": 4.387399463806971e-05, - "loss": 0.2227, - "step": 507 - }, - { - "epoch": 1.34, - "learning_rate": 4.386058981233244e-05, - "loss": 0.1383, - "step": 508 - }, - { - "epoch": 1.35, - "learning_rate": 4.384718498659518e-05, - "loss": 0.3463, - "step": 509 - }, - { - "epoch": 1.35, - "learning_rate": 4.383378016085791e-05, - "loss": 0.1104, - "step": 510 - }, - { - "epoch": 1.35, - "learning_rate": 4.382037533512065e-05, - "loss": 0.2439, - "step": 511 - }, - { - "epoch": 1.35, - "learning_rate": 4.380697050938338e-05, - "loss": 0.0725, - "step": 512 - }, - { - "epoch": 1.36, - "learning_rate": 4.379356568364612e-05, - "loss": 0.4928, - "step": 513 - }, - { - "epoch": 1.36, - "learning_rate": 4.3780160857908846e-05, - "loss": 0.0903, - "step": 514 - }, - { - "epoch": 1.36, - "learning_rate": 4.376675603217159e-05, - "loss": 0.1808, - "step": 515 - }, - { - "epoch": 1.37, - "learning_rate": 4.3753351206434316e-05, - "loss": 0.617, - "step": 516 - }, - { - "epoch": 1.37, - "learning_rate": 4.373994638069706e-05, - "loss": 0.0333, - "step": 517 - }, - { - "epoch": 1.37, - "learning_rate": 4.3726541554959786e-05, - "loss": 0.1113, - "step": 518 - }, - { - "epoch": 1.37, - "learning_rate": 4.371313672922252e-05, - "loss": 0.4604, - "step": 519 - }, - { - "epoch": 1.38, - "learning_rate": 4.3699731903485256e-05, - "loss": 0.4267, - "step": 520 - }, - { - "epoch": 1.38, - "learning_rate": 4.368632707774799e-05, - "loss": 0.2621, - "step": 521 - }, - { - "epoch": 1.38, - "learning_rate": 4.3672922252010727e-05, - "loss": 0.3236, - "step": 522 - }, - { - "epoch": 1.38, - "learning_rate": 4.365951742627346e-05, - "loss": 0.0559, - "step": 523 - }, - { - "epoch": 1.39, - "learning_rate": 4.364611260053619e-05, - "loss": 0.0801, - "step": 524 - }, - { - "epoch": 1.39, - "learning_rate": 4.363270777479893e-05, - "loss": 0.0518, - "step": 525 - }, - { - "epoch": 1.39, - "learning_rate": 4.361930294906166e-05, - "loss": 0.0618, - "step": 526 - }, - { - "epoch": 1.39, - "learning_rate": 4.36058981233244e-05, - "loss": 0.3043, - "step": 527 - }, - { - "epoch": 1.4, - "learning_rate": 4.359249329758713e-05, - "loss": 0.4693, - "step": 528 - }, - { - "epoch": 1.4, - "learning_rate": 4.3579088471849866e-05, - "loss": 0.2978, - "step": 529 - }, - { - "epoch": 1.4, - "learning_rate": 4.35656836461126e-05, - "loss": 0.0817, - "step": 530 - }, - { - "epoch": 1.4, - "learning_rate": 4.3552278820375336e-05, - "loss": 0.0784, - "step": 531 - }, - { - "epoch": 1.41, - "learning_rate": 4.353887399463807e-05, - "loss": 0.5389, - "step": 532 - }, - { - "epoch": 1.41, - "learning_rate": 4.3525469168900806e-05, - "loss": 0.2337, - "step": 533 - }, - { - "epoch": 1.41, - "learning_rate": 4.351206434316354e-05, - "loss": 0.3189, - "step": 534 - }, - { - "epoch": 1.42, - "learning_rate": 4.3498659517426276e-05, - "loss": 0.2503, - "step": 535 - }, - { - "epoch": 1.42, - "learning_rate": 4.348525469168901e-05, - "loss": 0.333, - "step": 536 - }, - { - "epoch": 1.42, - "learning_rate": 4.347184986595175e-05, - "loss": 0.2311, - "step": 537 - }, - { - "epoch": 1.42, - "learning_rate": 4.345844504021448e-05, - "loss": 0.5196, - "step": 538 - }, - { - "epoch": 1.43, - "learning_rate": 4.344504021447721e-05, - "loss": 0.5466, - "step": 539 - }, - { - "epoch": 1.43, - "learning_rate": 4.343163538873995e-05, - "loss": 0.4296, - "step": 540 - }, - { - "epoch": 1.43, - "learning_rate": 4.341823056300268e-05, - "loss": 0.6559, - "step": 541 - }, - { - "epoch": 1.43, - "learning_rate": 4.340482573726542e-05, - "loss": 0.6134, - "step": 542 - }, - { - "epoch": 1.44, - "learning_rate": 4.339142091152815e-05, - "loss": 0.5614, - "step": 543 - }, - { - "epoch": 1.44, - "learning_rate": 4.3378016085790886e-05, - "loss": 0.3143, - "step": 544 - }, - { - "epoch": 1.44, - "learning_rate": 4.336461126005362e-05, - "loss": 0.564, - "step": 545 - }, - { - "epoch": 1.44, - "learning_rate": 4.3351206434316356e-05, - "loss": 0.2141, - "step": 546 - }, - { - "epoch": 1.45, - "learning_rate": 4.333780160857909e-05, - "loss": 0.5886, - "step": 547 - }, - { - "epoch": 1.45, - "learning_rate": 4.3324396782841826e-05, - "loss": 0.2129, - "step": 548 - }, - { - "epoch": 1.45, - "learning_rate": 4.3310991957104555e-05, - "loss": 0.2491, - "step": 549 - }, - { - "epoch": 1.46, - "learning_rate": 4.32975871313673e-05, - "loss": 0.4023, - "step": 550 - }, - { - "epoch": 1.46, - "learning_rate": 4.3284182305630025e-05, - "loss": 0.2159, - "step": 551 - }, - { - "epoch": 1.46, - "learning_rate": 4.327077747989277e-05, - "loss": 0.2975, - "step": 552 - }, - { - "epoch": 1.46, - "learning_rate": 4.3257372654155495e-05, - "loss": 0.165, - "step": 553 - }, - { - "epoch": 1.47, - "learning_rate": 4.324396782841823e-05, - "loss": 0.063, - "step": 554 - }, - { - "epoch": 1.47, - "learning_rate": 4.3230563002680966e-05, - "loss": 0.4209, - "step": 555 - }, - { - "epoch": 1.47, - "learning_rate": 4.32171581769437e-05, - "loss": 0.2704, - "step": 556 - }, - { - "epoch": 1.47, - "learning_rate": 4.3203753351206436e-05, - "loss": 0.6455, - "step": 557 - }, - { - "epoch": 1.48, - "learning_rate": 4.319034852546917e-05, - "loss": 0.3354, - "step": 558 - }, - { - "epoch": 1.48, - "learning_rate": 4.3176943699731906e-05, - "loss": 0.4069, - "step": 559 - }, - { - "epoch": 1.48, - "learning_rate": 4.316353887399464e-05, - "loss": 0.2334, - "step": 560 - }, - { - "epoch": 1.48, - "learning_rate": 4.3150134048257376e-05, - "loss": 0.4192, - "step": 561 - }, - { - "epoch": 1.49, - "learning_rate": 4.313672922252011e-05, - "loss": 0.2942, - "step": 562 - }, - { - "epoch": 1.49, - "learning_rate": 4.3123324396782847e-05, - "loss": 0.2503, - "step": 563 - }, - { - "epoch": 1.49, - "learning_rate": 4.3109919571045575e-05, - "loss": 0.15, - "step": 564 - }, - { - "epoch": 1.49, - "learning_rate": 4.309651474530832e-05, - "loss": 0.6895, - "step": 565 - }, - { - "epoch": 1.5, - "learning_rate": 4.3083109919571045e-05, - "loss": 0.2462, - "step": 566 - }, - { - "epoch": 1.5, - "learning_rate": 4.306970509383379e-05, - "loss": 0.142, - "step": 567 - }, - { - "epoch": 1.5, - "learning_rate": 4.3056300268096515e-05, - "loss": 0.4838, - "step": 568 - }, - { - "epoch": 1.51, - "learning_rate": 4.304289544235925e-05, - "loss": 0.4714, - "step": 569 - }, - { - "epoch": 1.51, - "learning_rate": 4.3029490616621986e-05, - "loss": 0.4916, - "step": 570 - }, - { - "epoch": 1.51, - "learning_rate": 4.301608579088472e-05, - "loss": 0.2001, - "step": 571 - }, - { - "epoch": 1.51, - "learning_rate": 4.3002680965147456e-05, - "loss": 0.3897, - "step": 572 - }, - { - "epoch": 1.52, - "learning_rate": 4.298927613941019e-05, - "loss": 0.1723, - "step": 573 - }, - { - "epoch": 1.52, - "learning_rate": 4.297587131367292e-05, - "loss": 0.4095, - "step": 574 - }, - { - "epoch": 1.52, - "learning_rate": 4.296246648793566e-05, - "loss": 0.2615, - "step": 575 - }, - { - "epoch": 1.52, - "learning_rate": 4.294906166219839e-05, - "loss": 0.221, - "step": 576 - }, - { - "epoch": 1.53, - "learning_rate": 4.293565683646113e-05, - "loss": 0.1373, - "step": 577 - }, - { - "epoch": 1.53, - "learning_rate": 4.292225201072386e-05, - "loss": 0.313, - "step": 578 - }, - { - "epoch": 1.53, - "learning_rate": 4.2908847184986595e-05, - "loss": 0.4608, - "step": 579 - }, - { - "epoch": 1.53, - "learning_rate": 4.289544235924933e-05, - "loss": 0.2894, - "step": 580 - }, - { - "epoch": 1.54, - "learning_rate": 4.2882037533512065e-05, - "loss": 0.5123, - "step": 581 - }, - { - "epoch": 1.54, - "learning_rate": 4.28686327077748e-05, - "loss": 0.3287, - "step": 582 - }, - { - "epoch": 1.54, - "learning_rate": 4.2855227882037536e-05, - "loss": 0.4634, - "step": 583 - }, - { - "epoch": 1.54, - "learning_rate": 4.284182305630027e-05, - "loss": 0.0753, - "step": 584 - }, - { - "epoch": 1.55, - "learning_rate": 4.2828418230563006e-05, - "loss": 0.5287, - "step": 585 - }, - { - "epoch": 1.55, - "learning_rate": 4.281501340482574e-05, - "loss": 0.5483, - "step": 586 - }, - { - "epoch": 1.55, - "learning_rate": 4.2801608579088476e-05, - "loss": 0.1076, - "step": 587 - }, - { - "epoch": 1.56, - "learning_rate": 4.278820375335121e-05, - "loss": 0.232, - "step": 588 - }, - { - "epoch": 1.56, - "learning_rate": 4.277479892761394e-05, - "loss": 0.4706, - "step": 589 - }, - { - "epoch": 1.56, - "learning_rate": 4.276139410187668e-05, - "loss": 0.5791, - "step": 590 - }, - { - "epoch": 1.56, - "learning_rate": 4.274798927613941e-05, - "loss": 0.4995, - "step": 591 - }, - { - "epoch": 1.57, - "learning_rate": 4.273458445040215e-05, - "loss": 0.3419, - "step": 592 - }, - { - "epoch": 1.57, - "learning_rate": 4.272117962466488e-05, - "loss": 0.2728, - "step": 593 - }, - { - "epoch": 1.57, - "learning_rate": 4.2707774798927615e-05, - "loss": 0.3598, - "step": 594 - }, - { - "epoch": 1.57, - "learning_rate": 4.269436997319035e-05, - "loss": 0.2669, - "step": 595 - }, - { - "epoch": 1.58, - "learning_rate": 4.2680965147453086e-05, - "loss": 0.4091, - "step": 596 - }, - { - "epoch": 1.58, - "learning_rate": 4.266756032171582e-05, - "loss": 0.7653, - "step": 597 - }, - { - "epoch": 1.58, - "learning_rate": 4.2654155495978556e-05, - "loss": 0.4323, - "step": 598 - }, - { - "epoch": 1.58, - "learning_rate": 4.2640750670241284e-05, - "loss": 0.2147, - "step": 599 - }, - { - "epoch": 1.59, - "learning_rate": 4.2627345844504026e-05, - "loss": 0.2229, - "step": 600 - }, - { - "epoch": 1.59, - "learning_rate": 4.2613941018766754e-05, - "loss": 0.1477, - "step": 601 - }, - { - "epoch": 1.59, - "learning_rate": 4.2600536193029496e-05, - "loss": 0.4813, - "step": 602 - }, - { - "epoch": 1.6, - "learning_rate": 4.2587131367292225e-05, - "loss": 0.2719, - "step": 603 - }, - { - "epoch": 1.6, - "learning_rate": 4.257372654155496e-05, - "loss": 0.1577, - "step": 604 - }, - { - "epoch": 1.6, - "learning_rate": 4.2560321715817695e-05, - "loss": 0.5718, - "step": 605 - }, - { - "epoch": 1.6, - "learning_rate": 4.254691689008043e-05, - "loss": 0.1481, - "step": 606 - }, - { - "epoch": 1.61, - "learning_rate": 4.2533512064343165e-05, - "loss": 0.3548, - "step": 607 - }, - { - "epoch": 1.61, - "learning_rate": 4.25201072386059e-05, - "loss": 0.1907, - "step": 608 - }, - { - "epoch": 1.61, - "learning_rate": 4.250670241286863e-05, - "loss": 0.4215, - "step": 609 - }, - { - "epoch": 1.61, - "learning_rate": 4.249329758713137e-05, - "loss": 0.0681, - "step": 610 - }, - { - "epoch": 1.62, - "learning_rate": 4.24798927613941e-05, - "loss": 0.514, - "step": 611 - }, - { - "epoch": 1.62, - "learning_rate": 4.246648793565684e-05, - "loss": 0.1242, - "step": 612 - }, - { - "epoch": 1.62, - "learning_rate": 4.245308310991957e-05, - "loss": 0.217, - "step": 613 - }, - { - "epoch": 1.62, - "learning_rate": 4.243967828418231e-05, - "loss": 0.1641, - "step": 614 - }, - { - "epoch": 1.63, - "learning_rate": 4.242627345844504e-05, - "loss": 0.7095, - "step": 615 - }, - { - "epoch": 1.63, - "learning_rate": 4.2412868632707775e-05, - "loss": 0.359, - "step": 616 - }, - { - "epoch": 1.63, - "learning_rate": 4.239946380697051e-05, - "loss": 0.94, - "step": 617 - }, - { - "epoch": 1.63, - "learning_rate": 4.2386058981233245e-05, - "loss": 0.5194, - "step": 618 - }, - { - "epoch": 1.64, - "learning_rate": 4.237265415549598e-05, - "loss": 0.3398, - "step": 619 - }, - { - "epoch": 1.64, - "learning_rate": 4.2359249329758715e-05, - "loss": 0.2305, - "step": 620 - }, - { - "epoch": 1.64, - "learning_rate": 4.234584450402145e-05, - "loss": 0.6045, - "step": 621 - }, - { - "epoch": 1.65, - "learning_rate": 4.2332439678284185e-05, - "loss": 0.3196, - "step": 622 - }, - { - "epoch": 1.65, - "learning_rate": 4.231903485254692e-05, - "loss": 0.0753, - "step": 623 - }, - { - "epoch": 1.65, - "learning_rate": 4.2305630026809656e-05, - "loss": 0.2732, - "step": 624 - }, - { - "epoch": 1.65, - "learning_rate": 4.229222520107239e-05, - "loss": 0.4372, - "step": 625 - }, - { - "epoch": 1.66, - "learning_rate": 4.227882037533512e-05, - "loss": 0.1299, - "step": 626 - }, - { - "epoch": 1.66, - "learning_rate": 4.226541554959786e-05, - "loss": 0.2928, - "step": 627 - }, - { - "epoch": 1.66, - "learning_rate": 4.225201072386059e-05, - "loss": 0.2028, - "step": 628 - }, - { - "epoch": 1.66, - "learning_rate": 4.223860589812333e-05, - "loss": 0.2725, - "step": 629 - }, - { - "epoch": 1.67, - "learning_rate": 4.222520107238606e-05, - "loss": 0.0851, - "step": 630 - }, - { - "epoch": 1.67, - "learning_rate": 4.2211796246648795e-05, - "loss": 0.1471, - "step": 631 - }, - { - "epoch": 1.67, - "learning_rate": 4.219839142091153e-05, - "loss": 0.1986, - "step": 632 - }, - { - "epoch": 1.67, - "learning_rate": 4.2184986595174265e-05, - "loss": 0.5156, - "step": 633 - }, - { - "epoch": 1.68, - "learning_rate": 4.2171581769437e-05, - "loss": 0.5286, - "step": 634 - }, - { - "epoch": 1.68, - "learning_rate": 4.2158176943699735e-05, - "loss": 0.3635, - "step": 635 - }, - { - "epoch": 1.68, - "learning_rate": 4.2144772117962464e-05, - "loss": 0.1407, - "step": 636 - }, - { - "epoch": 1.69, - "learning_rate": 4.2131367292225206e-05, - "loss": 0.1042, - "step": 637 - }, - { - "epoch": 1.69, - "learning_rate": 4.2117962466487934e-05, - "loss": 0.1553, - "step": 638 - }, - { - "epoch": 1.69, - "learning_rate": 4.2104557640750676e-05, - "loss": 0.1665, - "step": 639 - }, - { - "epoch": 1.69, - "learning_rate": 4.2091152815013404e-05, - "loss": 0.3706, - "step": 640 - }, - { - "epoch": 1.7, - "learning_rate": 4.207774798927614e-05, - "loss": 0.6195, - "step": 641 - }, - { - "epoch": 1.7, - "learning_rate": 4.2064343163538874e-05, - "loss": 0.1341, - "step": 642 - }, - { - "epoch": 1.7, - "learning_rate": 4.205093833780161e-05, - "loss": 0.5384, - "step": 643 - }, - { - "epoch": 1.7, - "learning_rate": 4.2037533512064345e-05, - "loss": 0.2802, - "step": 644 - }, - { - "epoch": 1.71, - "learning_rate": 4.202412868632708e-05, - "loss": 0.3812, - "step": 645 - }, - { - "epoch": 1.71, - "learning_rate": 4.2010723860589815e-05, - "loss": 0.2433, - "step": 646 - }, - { - "epoch": 1.71, - "learning_rate": 4.199731903485255e-05, - "loss": 0.0642, - "step": 647 - }, - { - "epoch": 1.71, - "learning_rate": 4.1983914209115285e-05, - "loss": 0.0547, - "step": 648 - }, - { - "epoch": 1.72, - "learning_rate": 4.197050938337802e-05, - "loss": 0.4388, - "step": 649 - }, - { - "epoch": 1.72, - "learning_rate": 4.1957104557640756e-05, - "loss": 0.8228, - "step": 650 - }, - { - "epoch": 1.72, - "learning_rate": 4.1943699731903484e-05, - "loss": 0.6453, - "step": 651 - }, - { - "epoch": 1.72, - "learning_rate": 4.1930294906166226e-05, - "loss": 0.3367, - "step": 652 - }, - { - "epoch": 1.73, - "learning_rate": 4.1916890080428954e-05, - "loss": 0.2139, - "step": 653 - }, - { - "epoch": 1.73, - "learning_rate": 4.1903485254691696e-05, - "loss": 0.2144, - "step": 654 - }, - { - "epoch": 1.73, - "learning_rate": 4.1890080428954424e-05, - "loss": 0.3894, - "step": 655 - }, - { - "epoch": 1.74, - "learning_rate": 4.187667560321716e-05, - "loss": 0.0891, - "step": 656 - }, - { - "epoch": 1.74, - "learning_rate": 4.1863270777479895e-05, - "loss": 0.1287, - "step": 657 - }, - { - "epoch": 1.74, - "learning_rate": 4.184986595174263e-05, - "loss": 0.2826, - "step": 658 - }, - { - "epoch": 1.74, - "learning_rate": 4.1836461126005365e-05, - "loss": 0.474, - "step": 659 - }, - { - "epoch": 1.75, - "learning_rate": 4.18230563002681e-05, - "loss": 0.4228, - "step": 660 - }, - { - "epoch": 1.75, - "learning_rate": 4.180965147453083e-05, - "loss": 0.4952, - "step": 661 - }, - { - "epoch": 1.75, - "learning_rate": 4.179624664879357e-05, - "loss": 0.173, - "step": 662 - }, - { - "epoch": 1.75, - "learning_rate": 4.17828418230563e-05, - "loss": 0.363, - "step": 663 - }, - { - "epoch": 1.76, - "learning_rate": 4.176943699731904e-05, - "loss": 0.4404, - "step": 664 - }, - { - "epoch": 1.76, - "learning_rate": 4.175603217158177e-05, - "loss": 0.486, - "step": 665 - }, - { - "epoch": 1.76, - "learning_rate": 4.1742627345844504e-05, - "loss": 0.4463, - "step": 666 - }, - { - "epoch": 1.76, - "learning_rate": 4.172922252010724e-05, - "loss": 0.2409, - "step": 667 - }, - { - "epoch": 1.77, - "learning_rate": 4.1715817694369974e-05, - "loss": 0.5291, - "step": 668 - }, - { - "epoch": 1.77, - "learning_rate": 4.170241286863271e-05, - "loss": 0.069, - "step": 669 - }, - { - "epoch": 1.77, - "learning_rate": 4.1689008042895445e-05, - "loss": 0.4162, - "step": 670 - }, - { - "epoch": 1.78, - "learning_rate": 4.167560321715818e-05, - "loss": 0.6171, - "step": 671 - }, - { - "epoch": 1.78, - "learning_rate": 4.1662198391420915e-05, - "loss": 0.3097, - "step": 672 - }, - { - "epoch": 1.78, - "learning_rate": 4.164879356568365e-05, - "loss": 0.5109, - "step": 673 - }, - { - "epoch": 1.78, - "learning_rate": 4.1635388739946385e-05, - "loss": 0.2169, - "step": 674 - }, - { - "epoch": 1.79, - "learning_rate": 4.162198391420912e-05, - "loss": 0.2406, - "step": 675 - }, - { - "epoch": 1.79, - "learning_rate": 4.160857908847185e-05, - "loss": 0.1853, - "step": 676 - }, - { - "epoch": 1.79, - "learning_rate": 4.159517426273459e-05, - "loss": 0.5743, - "step": 677 - }, - { - "epoch": 1.79, - "learning_rate": 4.158176943699732e-05, - "loss": 0.5432, - "step": 678 - }, - { - "epoch": 1.8, - "learning_rate": 4.156836461126006e-05, - "loss": 0.2033, - "step": 679 - }, - { - "epoch": 1.8, - "learning_rate": 4.155495978552279e-05, - "loss": 0.3848, - "step": 680 - }, - { - "epoch": 1.8, - "learning_rate": 4.1541554959785524e-05, - "loss": 0.1721, - "step": 681 - }, - { - "epoch": 1.8, - "learning_rate": 4.152815013404826e-05, - "loss": 0.3793, - "step": 682 - }, - { - "epoch": 1.81, - "learning_rate": 4.1514745308310994e-05, - "loss": 0.3848, - "step": 683 - }, - { - "epoch": 1.81, - "learning_rate": 4.150134048257373e-05, - "loss": 0.186, - "step": 684 - }, - { - "epoch": 1.81, - "learning_rate": 4.1487935656836465e-05, - "loss": 0.2692, - "step": 685 - }, - { - "epoch": 1.81, - "learning_rate": 4.147453083109919e-05, - "loss": 0.3839, - "step": 686 - }, - { - "epoch": 1.82, - "learning_rate": 4.1461126005361935e-05, - "loss": 0.3037, - "step": 687 - }, - { - "epoch": 1.82, - "learning_rate": 4.144772117962466e-05, - "loss": 0.6401, - "step": 688 - }, - { - "epoch": 1.82, - "learning_rate": 4.1434316353887405e-05, - "loss": 0.1173, - "step": 689 - }, - { - "epoch": 1.83, - "learning_rate": 4.1420911528150134e-05, - "loss": 0.3217, - "step": 690 - }, - { - "epoch": 1.83, - "learning_rate": 4.140750670241287e-05, - "loss": 0.2358, - "step": 691 - }, - { - "epoch": 1.83, - "learning_rate": 4.1394101876675604e-05, - "loss": 0.7696, - "step": 692 - }, - { - "epoch": 1.83, - "learning_rate": 4.138069705093834e-05, - "loss": 0.2288, - "step": 693 - }, - { - "epoch": 1.84, - "learning_rate": 4.1367292225201074e-05, - "loss": 0.2575, - "step": 694 - }, - { - "epoch": 1.84, - "learning_rate": 4.135388739946381e-05, - "loss": 0.1201, - "step": 695 - }, - { - "epoch": 1.84, - "learning_rate": 4.1340482573726544e-05, - "loss": 0.2034, - "step": 696 - }, - { - "epoch": 1.84, - "learning_rate": 4.132707774798928e-05, - "loss": 0.1142, - "step": 697 - }, - { - "epoch": 1.85, - "learning_rate": 4.1313672922252015e-05, - "loss": 0.5671, - "step": 698 - }, - { - "epoch": 1.85, - "learning_rate": 4.130026809651475e-05, - "loss": 0.3132, - "step": 699 - }, - { - "epoch": 1.85, - "learning_rate": 4.1286863270777485e-05, - "loss": 0.4266, - "step": 700 - }, - { - "epoch": 1.85, - "learning_rate": 4.127345844504021e-05, - "loss": 0.1354, - "step": 701 - }, - { - "epoch": 1.86, - "learning_rate": 4.1260053619302955e-05, - "loss": 0.2867, - "step": 702 - }, - { - "epoch": 1.86, - "learning_rate": 4.1246648793565684e-05, - "loss": 0.1839, - "step": 703 - }, - { - "epoch": 1.86, - "learning_rate": 4.1233243967828425e-05, - "loss": 0.4741, - "step": 704 - }, - { - "epoch": 1.87, - "learning_rate": 4.1219839142091154e-05, - "loss": 0.2909, - "step": 705 - }, - { - "epoch": 1.87, - "learning_rate": 4.120643431635389e-05, - "loss": 0.2705, - "step": 706 - }, - { - "epoch": 1.87, - "learning_rate": 4.1193029490616624e-05, - "loss": 0.1354, - "step": 707 - }, - { - "epoch": 1.87, - "learning_rate": 4.117962466487936e-05, - "loss": 0.4801, - "step": 708 - }, - { - "epoch": 1.88, - "learning_rate": 4.1166219839142094e-05, - "loss": 0.189, - "step": 709 - }, - { - "epoch": 1.88, - "learning_rate": 4.115281501340483e-05, - "loss": 0.3204, - "step": 710 - }, - { - "epoch": 1.88, - "learning_rate": 4.113941018766756e-05, - "loss": 0.4358, - "step": 711 - }, - { - "epoch": 1.88, - "learning_rate": 4.11260053619303e-05, - "loss": 0.9474, - "step": 712 - }, - { - "epoch": 1.89, - "learning_rate": 4.111260053619303e-05, - "loss": 0.2102, - "step": 713 - }, - { - "epoch": 1.89, - "learning_rate": 4.109919571045577e-05, - "loss": 0.3927, - "step": 714 - }, - { - "epoch": 1.89, - "learning_rate": 4.10857908847185e-05, - "loss": 0.139, - "step": 715 - }, - { - "epoch": 1.89, - "learning_rate": 4.1072386058981233e-05, - "loss": 0.3575, - "step": 716 - }, - { - "epoch": 1.9, - "learning_rate": 4.105898123324397e-05, - "loss": 0.7534, - "step": 717 - }, - { - "epoch": 1.9, - "learning_rate": 4.1045576407506704e-05, - "loss": 0.1134, - "step": 718 - }, - { - "epoch": 1.9, - "learning_rate": 4.103217158176944e-05, - "loss": 0.2136, - "step": 719 - }, - { - "epoch": 1.9, - "learning_rate": 4.1018766756032174e-05, - "loss": 0.4344, - "step": 720 - }, - { - "epoch": 1.91, - "learning_rate": 4.10053619302949e-05, - "loss": 0.0695, - "step": 721 - }, - { - "epoch": 1.91, - "learning_rate": 4.0991957104557644e-05, - "loss": 0.2286, - "step": 722 - }, - { - "epoch": 1.91, - "learning_rate": 4.097855227882037e-05, - "loss": 0.1189, - "step": 723 - }, - { - "epoch": 1.92, - "learning_rate": 4.0965147453083115e-05, - "loss": 0.2882, - "step": 724 - }, - { - "epoch": 1.92, - "learning_rate": 4.095174262734584e-05, - "loss": 0.2623, - "step": 725 - }, - { - "epoch": 1.92, - "learning_rate": 4.093833780160858e-05, - "loss": 0.2473, - "step": 726 - }, - { - "epoch": 1.92, - "learning_rate": 4.092493297587131e-05, - "loss": 0.4846, - "step": 727 - }, - { - "epoch": 1.93, - "learning_rate": 4.091152815013405e-05, - "loss": 0.1689, - "step": 728 - }, - { - "epoch": 1.93, - "learning_rate": 4.0898123324396783e-05, - "loss": 0.3481, - "step": 729 - }, - { - "epoch": 1.93, - "learning_rate": 4.088471849865952e-05, - "loss": 0.3447, - "step": 730 - }, - { - "epoch": 1.93, - "learning_rate": 4.0871313672922254e-05, - "loss": 0.2959, - "step": 731 - }, - { - "epoch": 1.94, - "learning_rate": 4.085790884718499e-05, - "loss": 0.3387, - "step": 732 - }, - { - "epoch": 1.94, - "learning_rate": 4.0844504021447724e-05, - "loss": 0.3742, - "step": 733 - }, - { - "epoch": 1.94, - "learning_rate": 4.083109919571046e-05, - "loss": 0.3245, - "step": 734 - }, - { - "epoch": 1.94, - "learning_rate": 4.0817694369973194e-05, - "loss": 0.4891, - "step": 735 - }, - { - "epoch": 1.95, - "learning_rate": 4.080428954423593e-05, - "loss": 0.1444, - "step": 736 - }, - { - "epoch": 1.95, - "learning_rate": 4.0790884718498664e-05, - "loss": 0.3678, - "step": 737 - }, - { - "epoch": 1.95, - "learning_rate": 4.077747989276139e-05, - "loss": 0.3772, - "step": 738 - }, - { - "epoch": 1.96, - "learning_rate": 4.0764075067024135e-05, - "loss": 0.43, - "step": 739 - }, - { - "epoch": 1.96, - "learning_rate": 4.075067024128686e-05, - "loss": 0.2463, - "step": 740 - }, - { - "epoch": 1.96, - "learning_rate": 4.0737265415549605e-05, - "loss": 0.2277, - "step": 741 - }, - { - "epoch": 1.96, - "learning_rate": 4.072386058981233e-05, - "loss": 0.2153, - "step": 742 - }, - { - "epoch": 1.97, - "learning_rate": 4.071045576407507e-05, - "loss": 0.1052, - "step": 743 - }, - { - "epoch": 1.97, - "learning_rate": 4.0697050938337804e-05, - "loss": 0.5657, - "step": 744 - }, - { - "epoch": 1.97, - "learning_rate": 4.068364611260054e-05, - "loss": 0.2664, - "step": 745 - }, - { - "epoch": 1.97, - "learning_rate": 4.0670241286863274e-05, - "loss": 0.1369, - "step": 746 - }, - { - "epoch": 1.98, - "learning_rate": 4.065683646112601e-05, - "loss": 0.4972, - "step": 747 - }, - { - "epoch": 1.98, - "learning_rate": 4.064343163538874e-05, - "loss": 0.4131, - "step": 748 - }, - { - "epoch": 1.98, - "learning_rate": 4.063002680965148e-05, - "loss": 0.3824, - "step": 749 - }, - { - "epoch": 1.98, - "learning_rate": 4.061662198391421e-05, - "loss": 0.323, - "step": 750 - }, - { - "epoch": 1.99, - "learning_rate": 4.060321715817695e-05, - "loss": 0.3698, - "step": 751 - }, - { - "epoch": 1.99, - "learning_rate": 4.058981233243968e-05, - "loss": 0.1708, - "step": 752 - }, - { - "epoch": 1.99, - "learning_rate": 4.057640750670241e-05, - "loss": 0.2941, - "step": 753 - }, - { - "epoch": 1.99, - "learning_rate": 4.056300268096515e-05, - "loss": 0.3224, - "step": 754 - }, - { - "epoch": 2.0, - "learning_rate": 4.054959785522788e-05, - "loss": 0.0851, - "step": 755 - }, - { - "epoch": 2.0, - "learning_rate": 4.053619302949062e-05, - "loss": 0.4694, - "step": 756 - }, - { - "epoch": 2.0, - "eval_f1": 0.7882736156351792, - "eval_loss": 0.4484867751598358, - "eval_runtime": 1.8734, - "eval_samples_per_second": 807.638, - "eval_steps_per_second": 50.711, - "step": 756 - }, - { - "epoch": 2.0, - "learning_rate": 4.0522788203753354e-05, - "loss": 0.4739, - "step": 757 - }, - { - "epoch": 2.01, - "learning_rate": 4.050938337801609e-05, - "loss": 0.4117, - "step": 758 - }, - { - "epoch": 2.01, - "learning_rate": 4.0495978552278824e-05, - "loss": 0.2169, - "step": 759 - }, - { - "epoch": 2.01, - "learning_rate": 4.048257372654156e-05, - "loss": 0.1848, - "step": 760 - }, - { - "epoch": 2.01, - "learning_rate": 4.0469168900804294e-05, - "loss": 0.5066, - "step": 761 - }, - { - "epoch": 2.02, - "learning_rate": 4.045576407506703e-05, - "loss": 0.1784, - "step": 762 - }, - { - "epoch": 2.02, - "learning_rate": 4.044235924932976e-05, - "loss": 0.3869, - "step": 763 - }, - { - "epoch": 2.02, - "learning_rate": 4.04289544235925e-05, - "loss": 0.1132, - "step": 764 - }, - { - "epoch": 2.02, - "learning_rate": 4.041554959785523e-05, - "loss": 0.2724, - "step": 765 - }, - { - "epoch": 2.03, - "learning_rate": 4.040214477211797e-05, - "loss": 0.0983, - "step": 766 - }, - { - "epoch": 2.03, - "learning_rate": 4.03887399463807e-05, - "loss": 0.1831, - "step": 767 - }, - { - "epoch": 2.03, - "learning_rate": 4.037533512064343e-05, - "loss": 0.1954, - "step": 768 - }, - { - "epoch": 2.03, - "learning_rate": 4.036193029490617e-05, - "loss": 0.4738, - "step": 769 - }, - { - "epoch": 2.04, - "learning_rate": 4.0348525469168903e-05, - "loss": 0.3375, - "step": 770 - }, - { - "epoch": 2.04, - "learning_rate": 4.033512064343164e-05, - "loss": 0.3991, - "step": 771 - }, - { - "epoch": 2.04, - "learning_rate": 4.0321715817694374e-05, - "loss": 0.0696, - "step": 772 - }, - { - "epoch": 2.04, - "learning_rate": 4.03083109919571e-05, - "loss": 0.0982, - "step": 773 - }, - { - "epoch": 2.05, - "learning_rate": 4.0294906166219844e-05, - "loss": 0.2236, - "step": 774 - }, - { - "epoch": 2.05, - "learning_rate": 4.028150134048257e-05, - "loss": 0.4225, - "step": 775 - }, - { - "epoch": 2.05, - "learning_rate": 4.0268096514745314e-05, - "loss": 0.0583, - "step": 776 - }, - { - "epoch": 2.06, - "learning_rate": 4.025469168900804e-05, - "loss": 0.1643, - "step": 777 - }, - { - "epoch": 2.06, - "learning_rate": 4.024128686327078e-05, - "loss": 0.0351, - "step": 778 - }, - { - "epoch": 2.06, - "learning_rate": 4.022788203753351e-05, - "loss": 0.4496, - "step": 779 - }, - { - "epoch": 2.06, - "learning_rate": 4.021447721179625e-05, - "loss": 0.0372, - "step": 780 - }, - { - "epoch": 2.07, - "learning_rate": 4.020107238605898e-05, - "loss": 0.4198, - "step": 781 - }, - { - "epoch": 2.07, - "learning_rate": 4.018766756032172e-05, - "loss": 0.1968, - "step": 782 - }, - { - "epoch": 2.07, - "learning_rate": 4.017426273458445e-05, - "loss": 0.515, - "step": 783 - }, - { - "epoch": 2.07, - "learning_rate": 4.016085790884719e-05, - "loss": 0.408, - "step": 784 - }, - { - "epoch": 2.08, - "learning_rate": 4.0147453083109924e-05, - "loss": 0.3693, - "step": 785 - }, - { - "epoch": 2.08, - "learning_rate": 4.013404825737266e-05, - "loss": 0.0561, - "step": 786 - }, - { - "epoch": 2.08, - "learning_rate": 4.0120643431635394e-05, - "loss": 0.4011, - "step": 787 - }, - { - "epoch": 2.08, - "learning_rate": 4.010723860589812e-05, - "loss": 0.0872, - "step": 788 - }, - { - "epoch": 2.09, - "learning_rate": 4.0093833780160864e-05, - "loss": 0.0768, - "step": 789 - }, - { - "epoch": 2.09, - "learning_rate": 4.008042895442359e-05, - "loss": 0.0184, - "step": 790 - }, - { - "epoch": 2.09, - "learning_rate": 4.0067024128686334e-05, - "loss": 0.3287, - "step": 791 - }, - { - "epoch": 2.1, - "learning_rate": 4.005361930294906e-05, - "loss": 0.0262, - "step": 792 - }, - { - "epoch": 2.1, - "learning_rate": 4.00402144772118e-05, - "loss": 0.0248, - "step": 793 - }, - { - "epoch": 2.1, - "learning_rate": 4.002680965147453e-05, - "loss": 0.3853, - "step": 794 - }, - { - "epoch": 2.1, - "learning_rate": 4.001340482573727e-05, - "loss": 0.3512, - "step": 795 - }, - { - "epoch": 2.11, - "learning_rate": 4e-05, - "loss": 0.4188, - "step": 796 - }, - { - "epoch": 2.11, - "learning_rate": 3.998659517426274e-05, - "loss": 0.1834, - "step": 797 - }, - { - "epoch": 2.11, - "learning_rate": 3.997319034852547e-05, - "loss": 0.2074, - "step": 798 - }, - { - "epoch": 2.11, - "learning_rate": 3.995978552278821e-05, - "loss": 0.7317, - "step": 799 - }, - { - "epoch": 2.12, - "learning_rate": 3.994638069705094e-05, - "loss": 0.3534, - "step": 800 - }, - { - "epoch": 2.12, - "learning_rate": 3.993297587131368e-05, - "loss": 0.3184, - "step": 801 - }, - { - "epoch": 2.12, - "learning_rate": 3.991957104557641e-05, - "loss": 0.1088, - "step": 802 - }, - { - "epoch": 2.12, - "learning_rate": 3.990616621983914e-05, - "loss": 0.0429, - "step": 803 - }, - { - "epoch": 2.13, - "learning_rate": 3.989276139410188e-05, - "loss": 0.4518, - "step": 804 - }, - { - "epoch": 2.13, - "learning_rate": 3.987935656836461e-05, - "loss": 0.1746, - "step": 805 - }, - { - "epoch": 2.13, - "learning_rate": 3.986595174262735e-05, - "loss": 0.1881, - "step": 806 - }, - { - "epoch": 2.13, - "learning_rate": 3.985254691689008e-05, - "loss": 0.4111, - "step": 807 - }, - { - "epoch": 2.14, - "learning_rate": 3.983914209115281e-05, - "loss": 0.059, - "step": 808 - }, - { - "epoch": 2.14, - "learning_rate": 3.982573726541555e-05, - "loss": 0.0495, - "step": 809 - }, - { - "epoch": 2.14, - "learning_rate": 3.981233243967828e-05, - "loss": 0.1134, - "step": 810 - }, - { - "epoch": 2.15, - "learning_rate": 3.9798927613941023e-05, - "loss": 0.5469, - "step": 811 - }, - { - "epoch": 2.15, - "learning_rate": 3.978552278820375e-05, - "loss": 0.0581, - "step": 812 - }, - { - "epoch": 2.15, - "learning_rate": 3.977211796246649e-05, - "loss": 0.1254, - "step": 813 - }, - { - "epoch": 2.15, - "learning_rate": 3.975871313672922e-05, - "loss": 0.0679, - "step": 814 - }, - { - "epoch": 2.16, - "learning_rate": 3.974530831099196e-05, - "loss": 0.0463, - "step": 815 - }, - { - "epoch": 2.16, - "learning_rate": 3.973190348525469e-05, - "loss": 0.33, - "step": 816 - }, - { - "epoch": 2.16, - "learning_rate": 3.971849865951743e-05, - "loss": 0.2931, - "step": 817 - }, - { - "epoch": 2.16, - "learning_rate": 3.970509383378016e-05, - "loss": 0.1034, - "step": 818 - }, - { - "epoch": 2.17, - "learning_rate": 3.96916890080429e-05, - "loss": 0.0379, - "step": 819 - }, - { - "epoch": 2.17, - "learning_rate": 3.967828418230563e-05, - "loss": 0.0456, - "step": 820 - }, - { - "epoch": 2.17, - "learning_rate": 3.966487935656837e-05, - "loss": 0.4862, - "step": 821 - }, - { - "epoch": 2.17, - "learning_rate": 3.96514745308311e-05, - "loss": 0.0512, - "step": 822 - }, - { - "epoch": 2.18, - "learning_rate": 3.963806970509383e-05, - "loss": 0.0879, - "step": 823 - }, - { - "epoch": 2.18, - "learning_rate": 3.962466487935657e-05, - "loss": 0.3664, - "step": 824 - }, - { - "epoch": 2.18, - "learning_rate": 3.96112600536193e-05, - "loss": 0.0975, - "step": 825 - }, - { - "epoch": 2.19, - "learning_rate": 3.9597855227882044e-05, - "loss": 0.162, - "step": 826 - }, - { - "epoch": 2.19, - "learning_rate": 3.958445040214477e-05, - "loss": 0.1076, - "step": 827 - }, - { - "epoch": 2.19, - "learning_rate": 3.957104557640751e-05, - "loss": 0.1185, - "step": 828 - }, - { - "epoch": 2.19, - "learning_rate": 3.955764075067024e-05, - "loss": 0.0868, - "step": 829 - }, - { - "epoch": 2.2, - "learning_rate": 3.954423592493298e-05, - "loss": 0.0246, - "step": 830 - }, - { - "epoch": 2.2, - "learning_rate": 3.953083109919571e-05, - "loss": 0.2233, - "step": 831 - }, - { - "epoch": 2.2, - "learning_rate": 3.951742627345845e-05, - "loss": 0.2113, - "step": 832 - }, - { - "epoch": 2.2, - "learning_rate": 3.9504021447721176e-05, - "loss": 0.0587, - "step": 833 - }, - { - "epoch": 2.21, - "learning_rate": 3.949061662198392e-05, - "loss": 0.0521, - "step": 834 - }, - { - "epoch": 2.21, - "learning_rate": 3.9477211796246646e-05, - "loss": 0.3845, - "step": 835 - }, - { - "epoch": 2.21, - "learning_rate": 3.946380697050939e-05, - "loss": 0.1096, - "step": 836 - }, - { - "epoch": 2.21, - "learning_rate": 3.9450402144772117e-05, - "loss": 0.1488, - "step": 837 - }, - { - "epoch": 2.22, - "learning_rate": 3.943699731903485e-05, - "loss": 0.1937, - "step": 838 - }, - { - "epoch": 2.22, - "learning_rate": 3.942359249329759e-05, - "loss": 0.1309, - "step": 839 - }, - { - "epoch": 2.22, - "learning_rate": 3.941018766756032e-05, - "loss": 0.3271, - "step": 840 - }, - { - "epoch": 2.22, - "learning_rate": 3.939678284182306e-05, - "loss": 0.3318, - "step": 841 - }, - { - "epoch": 2.23, - "learning_rate": 3.938337801608579e-05, - "loss": 0.3516, - "step": 842 - }, - { - "epoch": 2.23, - "learning_rate": 3.936997319034853e-05, - "loss": 0.1641, - "step": 843 - }, - { - "epoch": 2.23, - "learning_rate": 3.935656836461126e-05, - "loss": 0.064, - "step": 844 - }, - { - "epoch": 2.24, - "learning_rate": 3.9343163538874e-05, - "loss": 0.1971, - "step": 845 - }, - { - "epoch": 2.24, - "learning_rate": 3.932975871313673e-05, - "loss": 0.1166, - "step": 846 - }, - { - "epoch": 2.24, - "learning_rate": 3.931635388739947e-05, - "loss": 0.0384, - "step": 847 - }, - { - "epoch": 2.24, - "learning_rate": 3.9302949061662196e-05, - "loss": 0.0462, - "step": 848 - }, - { - "epoch": 2.25, - "learning_rate": 3.928954423592494e-05, - "loss": 0.1073, - "step": 849 - }, - { - "epoch": 2.25, - "learning_rate": 3.9276139410187666e-05, - "loss": 0.0227, - "step": 850 - }, - { - "epoch": 2.25, - "learning_rate": 3.926273458445041e-05, - "loss": 0.0683, - "step": 851 - }, - { - "epoch": 2.25, - "learning_rate": 3.924932975871314e-05, - "loss": 0.7962, - "step": 852 - }, - { - "epoch": 2.26, - "learning_rate": 3.923592493297587e-05, - "loss": 0.0635, - "step": 853 - }, - { - "epoch": 2.26, - "learning_rate": 3.922252010723861e-05, - "loss": 0.0454, - "step": 854 - }, - { - "epoch": 2.26, - "learning_rate": 3.920911528150134e-05, - "loss": 0.5389, - "step": 855 - }, - { - "epoch": 2.26, - "learning_rate": 3.919571045576408e-05, - "loss": 0.1181, - "step": 856 - }, - { - "epoch": 2.27, - "learning_rate": 3.918230563002681e-05, - "loss": 0.1372, - "step": 857 - }, - { - "epoch": 2.27, - "learning_rate": 3.916890080428954e-05, - "loss": 0.3192, - "step": 858 - }, - { - "epoch": 2.27, - "learning_rate": 3.915549597855228e-05, - "loss": 0.3419, - "step": 859 - }, - { - "epoch": 2.28, - "learning_rate": 3.914209115281501e-05, - "loss": 0.0114, - "step": 860 - }, - { - "epoch": 2.28, - "learning_rate": 3.912868632707775e-05, - "loss": 0.1905, - "step": 861 - }, - { - "epoch": 2.28, - "learning_rate": 3.911528150134048e-05, - "loss": 0.0218, - "step": 862 - }, - { - "epoch": 2.28, - "learning_rate": 3.910187667560322e-05, - "loss": 0.4374, - "step": 863 - }, - { - "epoch": 2.29, - "learning_rate": 3.908847184986595e-05, - "loss": 0.1844, - "step": 864 - }, - { - "epoch": 2.29, - "learning_rate": 3.907506702412869e-05, - "loss": 0.2427, - "step": 865 - }, - { - "epoch": 2.29, - "learning_rate": 3.906166219839142e-05, - "loss": 0.2749, - "step": 866 - }, - { - "epoch": 2.29, - "learning_rate": 3.904825737265416e-05, - "loss": 0.2089, - "step": 867 - }, - { - "epoch": 2.3, - "learning_rate": 3.903485254691689e-05, - "loss": 0.06, - "step": 868 - }, - { - "epoch": 2.3, - "learning_rate": 3.902144772117963e-05, - "loss": 0.1951, - "step": 869 - }, - { - "epoch": 2.3, - "learning_rate": 3.900804289544236e-05, - "loss": 0.0252, - "step": 870 - }, - { - "epoch": 2.3, - "learning_rate": 3.89946380697051e-05, - "loss": 0.0299, - "step": 871 - }, - { - "epoch": 2.31, - "learning_rate": 3.898123324396783e-05, - "loss": 0.0298, - "step": 872 - }, - { - "epoch": 2.31, - "learning_rate": 3.896782841823057e-05, - "loss": 0.5186, - "step": 873 - }, - { - "epoch": 2.31, - "learning_rate": 3.89544235924933e-05, - "loss": 0.2704, - "step": 874 - }, - { - "epoch": 2.31, - "learning_rate": 3.894101876675603e-05, - "loss": 0.2435, - "step": 875 - }, - { - "epoch": 2.32, - "learning_rate": 3.892761394101877e-05, - "loss": 0.039, - "step": 876 - }, - { - "epoch": 2.32, - "learning_rate": 3.89142091152815e-05, - "loss": 0.0275, - "step": 877 - }, - { - "epoch": 2.32, - "learning_rate": 3.890080428954424e-05, - "loss": 0.1164, - "step": 878 - }, - { - "epoch": 2.33, - "learning_rate": 3.888739946380697e-05, - "loss": 0.1551, - "step": 879 - }, - { - "epoch": 2.33, - "learning_rate": 3.887399463806971e-05, - "loss": 0.0215, - "step": 880 - }, - { - "epoch": 2.33, - "learning_rate": 3.886058981233244e-05, - "loss": 0.0379, - "step": 881 - }, - { - "epoch": 2.33, - "learning_rate": 3.884718498659518e-05, - "loss": 0.0553, - "step": 882 - }, - { - "epoch": 2.34, - "learning_rate": 3.883378016085791e-05, - "loss": 0.1073, - "step": 883 - }, - { - "epoch": 2.34, - "learning_rate": 3.882037533512065e-05, - "loss": 0.3525, - "step": 884 - }, - { - "epoch": 2.34, - "learning_rate": 3.8806970509383376e-05, - "loss": 0.2646, - "step": 885 - }, - { - "epoch": 2.34, - "learning_rate": 3.879356568364612e-05, - "loss": 0.5758, - "step": 886 - }, - { - "epoch": 2.35, - "learning_rate": 3.8780160857908846e-05, - "loss": 0.9312, - "step": 887 - }, - { - "epoch": 2.35, - "learning_rate": 3.876675603217159e-05, - "loss": 0.2748, - "step": 888 - }, - { - "epoch": 2.35, - "learning_rate": 3.8753351206434316e-05, - "loss": 0.33, - "step": 889 - }, - { - "epoch": 2.35, - "learning_rate": 3.873994638069705e-05, - "loss": 0.0312, - "step": 890 - }, - { - "epoch": 2.36, - "learning_rate": 3.8726541554959786e-05, - "loss": 0.0449, - "step": 891 - }, - { - "epoch": 2.36, - "learning_rate": 3.871313672922252e-05, - "loss": 0.1197, - "step": 892 - }, - { - "epoch": 2.36, - "learning_rate": 3.869973190348526e-05, - "loss": 0.0913, - "step": 893 - }, - { - "epoch": 2.37, - "learning_rate": 3.868632707774799e-05, - "loss": 0.0284, - "step": 894 - }, - { - "epoch": 2.37, - "learning_rate": 3.867292225201073e-05, - "loss": 0.3769, - "step": 895 - }, - { - "epoch": 2.37, - "learning_rate": 3.865951742627346e-05, - "loss": 0.0947, - "step": 896 - }, - { - "epoch": 2.37, - "learning_rate": 3.86461126005362e-05, - "loss": 0.4282, - "step": 897 - }, - { - "epoch": 2.38, - "learning_rate": 3.863270777479893e-05, - "loss": 0.0049, - "step": 898 - }, - { - "epoch": 2.38, - "learning_rate": 3.861930294906167e-05, - "loss": 0.3632, - "step": 899 - }, - { - "epoch": 2.38, - "learning_rate": 3.8605898123324396e-05, - "loss": 0.0421, - "step": 900 - }, - { - "epoch": 2.38, - "learning_rate": 3.859249329758714e-05, - "loss": 0.5793, - "step": 901 - }, - { - "epoch": 2.39, - "learning_rate": 3.8579088471849866e-05, - "loss": 0.1695, - "step": 902 - }, - { - "epoch": 2.39, - "learning_rate": 3.856568364611261e-05, - "loss": 0.3082, - "step": 903 - }, - { - "epoch": 2.39, - "learning_rate": 3.8552278820375336e-05, - "loss": 0.0151, - "step": 904 - }, - { - "epoch": 2.39, - "learning_rate": 3.853887399463807e-05, - "loss": 0.3463, - "step": 905 - }, - { - "epoch": 2.4, - "learning_rate": 3.852546916890081e-05, - "loss": 0.4573, - "step": 906 - }, - { - "epoch": 2.4, - "learning_rate": 3.851206434316354e-05, - "loss": 0.1281, - "step": 907 - }, - { - "epoch": 2.4, - "learning_rate": 3.849865951742628e-05, - "loss": 0.3168, - "step": 908 - }, - { - "epoch": 2.4, - "learning_rate": 3.848525469168901e-05, - "loss": 0.0331, - "step": 909 - }, - { - "epoch": 2.41, - "learning_rate": 3.847184986595174e-05, - "loss": 0.1825, - "step": 910 - }, - { - "epoch": 2.41, - "learning_rate": 3.845844504021448e-05, - "loss": 0.6238, - "step": 911 - }, - { - "epoch": 2.41, - "learning_rate": 3.844504021447721e-05, - "loss": 0.0663, - "step": 912 - }, - { - "epoch": 2.42, - "learning_rate": 3.843163538873995e-05, - "loss": 0.2827, - "step": 913 - }, - { - "epoch": 2.42, - "learning_rate": 3.841823056300268e-05, - "loss": 0.0133, - "step": 914 - }, - { - "epoch": 2.42, - "learning_rate": 3.8404825737265416e-05, - "loss": 0.0415, - "step": 915 - }, - { - "epoch": 2.42, - "learning_rate": 3.839142091152815e-05, - "loss": 0.2969, - "step": 916 - }, - { - "epoch": 2.43, - "learning_rate": 3.8378016085790886e-05, - "loss": 0.0145, - "step": 917 - }, - { - "epoch": 2.43, - "learning_rate": 3.836461126005362e-05, - "loss": 0.0223, - "step": 918 - }, - { - "epoch": 2.43, - "learning_rate": 3.8351206434316357e-05, - "loss": 1.042, - "step": 919 - }, - { - "epoch": 2.43, - "learning_rate": 3.8337801608579085e-05, - "loss": 0.5061, - "step": 920 - }, - { - "epoch": 2.44, - "learning_rate": 3.832439678284183e-05, - "loss": 0.406, - "step": 921 - }, - { - "epoch": 2.44, - "learning_rate": 3.8310991957104555e-05, - "loss": 0.1337, - "step": 922 - }, - { - "epoch": 2.44, - "learning_rate": 3.82975871313673e-05, - "loss": 0.2057, - "step": 923 - }, - { - "epoch": 2.44, - "learning_rate": 3.8284182305630025e-05, - "loss": 0.0987, - "step": 924 - }, - { - "epoch": 2.45, - "learning_rate": 3.827077747989276e-05, - "loss": 0.4611, - "step": 925 - }, - { - "epoch": 2.45, - "learning_rate": 3.8257372654155496e-05, - "loss": 0.0152, - "step": 926 - }, - { - "epoch": 2.45, - "learning_rate": 3.824396782841823e-05, - "loss": 0.4478, - "step": 927 - }, - { - "epoch": 2.46, - "learning_rate": 3.8230563002680966e-05, - "loss": 0.0579, - "step": 928 - }, - { - "epoch": 2.46, - "learning_rate": 3.82171581769437e-05, - "loss": 0.1506, - "step": 929 - }, - { - "epoch": 2.46, - "learning_rate": 3.8203753351206436e-05, - "loss": 0.5514, - "step": 930 - }, - { - "epoch": 2.46, - "learning_rate": 3.819034852546917e-05, - "loss": 0.1505, - "step": 931 - }, - { - "epoch": 2.47, - "learning_rate": 3.8176943699731906e-05, - "loss": 0.1413, - "step": 932 - }, - { - "epoch": 2.47, - "learning_rate": 3.816353887399464e-05, - "loss": 0.4758, - "step": 933 - }, - { - "epoch": 2.47, - "learning_rate": 3.815013404825738e-05, - "loss": 0.0863, - "step": 934 - }, - { - "epoch": 2.47, - "learning_rate": 3.8136729222520105e-05, - "loss": 0.3561, - "step": 935 - }, - { - "epoch": 2.48, - "learning_rate": 3.812332439678285e-05, - "loss": 0.7784, - "step": 936 - }, - { - "epoch": 2.48, - "learning_rate": 3.8109919571045575e-05, - "loss": 0.2243, - "step": 937 - }, - { - "epoch": 2.48, - "learning_rate": 3.809651474530832e-05, - "loss": 0.1013, - "step": 938 - }, - { - "epoch": 2.48, - "learning_rate": 3.8083109919571046e-05, - "loss": 0.132, - "step": 939 - }, - { - "epoch": 2.49, - "learning_rate": 3.806970509383378e-05, - "loss": 0.3971, - "step": 940 - }, - { - "epoch": 2.49, - "learning_rate": 3.8056300268096516e-05, - "loss": 0.0637, - "step": 941 - }, - { - "epoch": 2.49, - "learning_rate": 3.804289544235925e-05, - "loss": 0.5178, - "step": 942 - }, - { - "epoch": 2.49, - "learning_rate": 3.8029490616621986e-05, - "loss": 0.1982, - "step": 943 - }, - { - "epoch": 2.5, - "learning_rate": 3.801608579088472e-05, - "loss": 0.2225, - "step": 944 - }, - { - "epoch": 2.5, - "learning_rate": 3.800268096514745e-05, - "loss": 0.1425, - "step": 945 - }, - { - "epoch": 2.5, - "learning_rate": 3.798927613941019e-05, - "loss": 0.0621, - "step": 946 - }, - { - "epoch": 2.51, - "learning_rate": 3.797587131367292e-05, - "loss": 0.1556, - "step": 947 - }, - { - "epoch": 2.51, - "learning_rate": 3.796246648793566e-05, - "loss": 0.3805, - "step": 948 - }, - { - "epoch": 2.51, - "learning_rate": 3.794906166219839e-05, - "loss": 0.7049, - "step": 949 - }, - { - "epoch": 2.51, - "learning_rate": 3.7935656836461125e-05, - "loss": 0.1055, - "step": 950 - }, - { - "epoch": 2.52, - "learning_rate": 3.792225201072386e-05, - "loss": 0.0489, - "step": 951 - }, - { - "epoch": 2.52, - "learning_rate": 3.7908847184986596e-05, - "loss": 0.1881, - "step": 952 - }, - { - "epoch": 2.52, - "learning_rate": 3.789544235924933e-05, - "loss": 0.0202, - "step": 953 - }, - { - "epoch": 2.52, - "learning_rate": 3.7882037533512066e-05, - "loss": 0.1043, - "step": 954 - }, - { - "epoch": 2.53, - "learning_rate": 3.78686327077748e-05, - "loss": 0.2093, - "step": 955 - }, - { - "epoch": 2.53, - "learning_rate": 3.7855227882037536e-05, - "loss": 0.0395, - "step": 956 - }, - { - "epoch": 2.53, - "learning_rate": 3.784182305630027e-05, - "loss": 0.1459, - "step": 957 - }, - { - "epoch": 2.53, - "learning_rate": 3.7828418230563006e-05, - "loss": 0.0338, - "step": 958 - }, - { - "epoch": 2.54, - "learning_rate": 3.781501340482574e-05, - "loss": 0.4741, - "step": 959 - }, - { - "epoch": 2.54, - "learning_rate": 3.780160857908847e-05, - "loss": 0.2049, - "step": 960 - }, - { - "epoch": 2.54, - "learning_rate": 3.778820375335121e-05, - "loss": 0.309, - "step": 961 - }, - { - "epoch": 2.54, - "learning_rate": 3.777479892761394e-05, - "loss": 0.0253, - "step": 962 - }, - { - "epoch": 2.55, - "learning_rate": 3.776139410187668e-05, - "loss": 0.4832, - "step": 963 - }, - { - "epoch": 2.55, - "learning_rate": 3.774798927613941e-05, - "loss": 0.2111, - "step": 964 - }, - { - "epoch": 2.55, - "learning_rate": 3.7734584450402145e-05, - "loss": 0.1788, - "step": 965 - }, - { - "epoch": 2.56, - "learning_rate": 3.772117962466488e-05, - "loss": 0.5252, - "step": 966 - }, - { - "epoch": 2.56, - "learning_rate": 3.7707774798927616e-05, - "loss": 0.4711, - "step": 967 - }, - { - "epoch": 2.56, - "learning_rate": 3.769436997319035e-05, - "loss": 0.5184, - "step": 968 - }, - { - "epoch": 2.56, - "learning_rate": 3.7680965147453086e-05, - "loss": 0.2164, - "step": 969 - }, - { - "epoch": 2.57, - "learning_rate": 3.7667560321715814e-05, - "loss": 0.5393, - "step": 970 - }, - { - "epoch": 2.57, - "learning_rate": 3.7654155495978556e-05, - "loss": 0.2588, - "step": 971 - }, - { - "epoch": 2.57, - "learning_rate": 3.7640750670241285e-05, - "loss": 0.164, - "step": 972 - }, - { - "epoch": 2.57, - "learning_rate": 3.7627345844504027e-05, - "loss": 0.2896, - "step": 973 - }, - { - "epoch": 2.58, - "learning_rate": 3.7613941018766755e-05, - "loss": 0.039, - "step": 974 - }, - { - "epoch": 2.58, - "learning_rate": 3.760053619302949e-05, - "loss": 0.16, - "step": 975 - }, - { - "epoch": 2.58, - "learning_rate": 3.7587131367292225e-05, - "loss": 0.1832, - "step": 976 - }, - { - "epoch": 2.58, - "learning_rate": 3.757372654155496e-05, - "loss": 0.0812, - "step": 977 - }, - { - "epoch": 2.59, - "learning_rate": 3.7560321715817695e-05, - "loss": 0.1476, - "step": 978 - }, - { - "epoch": 2.59, - "learning_rate": 3.754691689008043e-05, - "loss": 0.1853, - "step": 979 - }, - { - "epoch": 2.59, - "learning_rate": 3.7533512064343166e-05, - "loss": 0.2875, - "step": 980 - }, - { - "epoch": 2.6, - "learning_rate": 3.75201072386059e-05, - "loss": 0.1918, - "step": 981 - }, - { - "epoch": 2.6, - "learning_rate": 3.7506702412868636e-05, - "loss": 0.2445, - "step": 982 - }, - { - "epoch": 2.6, - "learning_rate": 3.749329758713137e-05, - "loss": 0.4653, - "step": 983 - }, - { - "epoch": 2.6, - "learning_rate": 3.7479892761394106e-05, - "loss": 0.0614, - "step": 984 - }, - { - "epoch": 2.61, - "learning_rate": 3.746648793565684e-05, - "loss": 0.2818, - "step": 985 - }, - { - "epoch": 2.61, - "learning_rate": 3.7453083109919576e-05, - "loss": 0.1363, - "step": 986 - }, - { - "epoch": 2.61, - "learning_rate": 3.7439678284182305e-05, - "loss": 0.3244, - "step": 987 - }, - { - "epoch": 2.61, - "learning_rate": 3.742627345844505e-05, - "loss": 0.081, - "step": 988 - }, - { - "epoch": 2.62, - "learning_rate": 3.7412868632707775e-05, - "loss": 0.0488, - "step": 989 - }, - { - "epoch": 2.62, - "learning_rate": 3.739946380697052e-05, - "loss": 0.2057, - "step": 990 - }, - { - "epoch": 2.62, - "learning_rate": 3.7386058981233245e-05, - "loss": 0.2598, - "step": 991 - }, - { - "epoch": 2.62, - "learning_rate": 3.737265415549598e-05, - "loss": 0.1318, - "step": 992 - }, - { - "epoch": 2.63, - "learning_rate": 3.7359249329758716e-05, - "loss": 0.0482, - "step": 993 - }, - { - "epoch": 2.63, - "learning_rate": 3.734584450402145e-05, - "loss": 0.2586, - "step": 994 - }, - { - "epoch": 2.63, - "learning_rate": 3.7332439678284186e-05, - "loss": 0.2533, - "step": 995 - }, - { - "epoch": 2.63, - "learning_rate": 3.731903485254692e-05, - "loss": 0.5292, - "step": 996 - }, - { - "epoch": 2.64, - "learning_rate": 3.730563002680965e-05, - "loss": 0.1677, - "step": 997 - }, - { - "epoch": 2.64, - "learning_rate": 3.729222520107239e-05, - "loss": 0.1869, - "step": 998 - }, - { - "epoch": 2.64, - "learning_rate": 3.727882037533512e-05, - "loss": 0.0645, - "step": 999 - }, - { - "epoch": 2.65, - "learning_rate": 3.726541554959786e-05, - "loss": 0.4107, - "step": 1000 - }, - { - "epoch": 2.65, - "learning_rate": 3.725201072386059e-05, - "loss": 0.0484, - "step": 1001 - }, - { - "epoch": 2.65, - "learning_rate": 3.7238605898123325e-05, - "loss": 0.0813, - "step": 1002 - }, - { - "epoch": 2.65, - "learning_rate": 3.722520107238606e-05, - "loss": 0.2467, - "step": 1003 - }, - { - "epoch": 2.66, - "learning_rate": 3.7211796246648795e-05, - "loss": 0.0324, - "step": 1004 - }, - { - "epoch": 2.66, - "learning_rate": 3.719839142091153e-05, - "loss": 0.0536, - "step": 1005 - }, - { - "epoch": 2.66, - "learning_rate": 3.7184986595174266e-05, - "loss": 0.0399, - "step": 1006 - }, - { - "epoch": 2.66, - "learning_rate": 3.7171581769436994e-05, - "loss": 0.0257, - "step": 1007 - }, - { - "epoch": 2.67, - "learning_rate": 3.7158176943699736e-05, - "loss": 0.0407, - "step": 1008 - }, - { - "epoch": 2.67, - "learning_rate": 3.7144772117962464e-05, - "loss": 0.0173, - "step": 1009 - }, - { - "epoch": 2.67, - "learning_rate": 3.7131367292225206e-05, - "loss": 0.0166, - "step": 1010 - }, - { - "epoch": 2.67, - "learning_rate": 3.7117962466487934e-05, - "loss": 0.1898, - "step": 1011 - }, - { - "epoch": 2.68, - "learning_rate": 3.710455764075067e-05, - "loss": 0.0525, - "step": 1012 - }, - { - "epoch": 2.68, - "learning_rate": 3.7091152815013405e-05, - "loss": 0.043, - "step": 1013 - }, - { - "epoch": 2.68, - "learning_rate": 3.707774798927614e-05, - "loss": 0.3994, - "step": 1014 - }, - { - "epoch": 2.69, - "learning_rate": 3.7064343163538875e-05, - "loss": 0.0372, - "step": 1015 - }, - { - "epoch": 2.69, - "learning_rate": 3.705093833780161e-05, - "loss": 0.2909, - "step": 1016 - }, - { - "epoch": 2.69, - "learning_rate": 3.7037533512064345e-05, - "loss": 0.8221, - "step": 1017 - }, - { - "epoch": 2.69, - "learning_rate": 3.702412868632708e-05, - "loss": 0.0084, - "step": 1018 - }, - { - "epoch": 2.7, - "learning_rate": 3.7010723860589815e-05, - "loss": 0.3058, - "step": 1019 - }, - { - "epoch": 2.7, - "learning_rate": 3.699731903485255e-05, - "loss": 1.0774, - "step": 1020 - }, - { - "epoch": 2.7, - "learning_rate": 3.6983914209115286e-05, - "loss": 0.2018, - "step": 1021 - }, - { - "epoch": 2.7, - "learning_rate": 3.6970509383378014e-05, - "loss": 0.0537, - "step": 1022 - }, - { - "epoch": 2.71, - "learning_rate": 3.6957104557640756e-05, - "loss": 0.444, - "step": 1023 - }, - { - "epoch": 2.71, - "learning_rate": 3.6943699731903484e-05, - "loss": 0.6497, - "step": 1024 - }, - { - "epoch": 2.71, - "learning_rate": 3.6930294906166226e-05, - "loss": 0.0097, - "step": 1025 - }, - { - "epoch": 2.71, - "learning_rate": 3.6916890080428955e-05, - "loss": 0.0082, - "step": 1026 - }, - { - "epoch": 2.72, - "learning_rate": 3.690348525469169e-05, - "loss": 0.0387, - "step": 1027 - }, - { - "epoch": 2.72, - "learning_rate": 3.6890080428954425e-05, - "loss": 0.3969, - "step": 1028 - }, - { - "epoch": 2.72, - "learning_rate": 3.687667560321716e-05, - "loss": 0.0136, - "step": 1029 - }, - { - "epoch": 2.72, - "learning_rate": 3.6863270777479895e-05, - "loss": 0.0099, - "step": 1030 - }, - { - "epoch": 2.73, - "learning_rate": 3.684986595174263e-05, - "loss": 0.3509, - "step": 1031 - }, - { - "epoch": 2.73, - "learning_rate": 3.683646112600536e-05, - "loss": 0.0257, - "step": 1032 - }, - { - "epoch": 2.73, - "learning_rate": 3.68230563002681e-05, - "loss": 0.3158, - "step": 1033 - }, - { - "epoch": 2.74, - "learning_rate": 3.680965147453083e-05, - "loss": 0.0677, - "step": 1034 - }, - { - "epoch": 2.74, - "learning_rate": 3.679624664879357e-05, - "loss": 0.3887, - "step": 1035 - }, - { - "epoch": 2.74, - "learning_rate": 3.67828418230563e-05, - "loss": 0.0714, - "step": 1036 - }, - { - "epoch": 2.74, - "learning_rate": 3.6769436997319034e-05, - "loss": 0.1066, - "step": 1037 - }, - { - "epoch": 2.75, - "learning_rate": 3.675603217158177e-05, - "loss": 0.6238, - "step": 1038 - }, - { - "epoch": 2.75, - "learning_rate": 3.6742627345844504e-05, - "loss": 0.0405, - "step": 1039 - }, - { - "epoch": 2.75, - "learning_rate": 3.672922252010724e-05, - "loss": 0.0223, - "step": 1040 - }, - { - "epoch": 2.75, - "learning_rate": 3.6715817694369975e-05, - "loss": 0.2737, - "step": 1041 - }, - { - "epoch": 2.76, - "learning_rate": 3.670241286863271e-05, - "loss": 0.015, - "step": 1042 - }, - { - "epoch": 2.76, - "learning_rate": 3.6689008042895445e-05, - "loss": 0.1709, - "step": 1043 - }, - { - "epoch": 2.76, - "learning_rate": 3.667560321715818e-05, - "loss": 0.2649, - "step": 1044 - }, - { - "epoch": 2.76, - "learning_rate": 3.6662198391420915e-05, - "loss": 0.1524, - "step": 1045 - }, - { - "epoch": 2.77, - "learning_rate": 3.664879356568365e-05, - "loss": 0.2461, - "step": 1046 - }, - { - "epoch": 2.77, - "learning_rate": 3.663538873994638e-05, - "loss": 0.3425, - "step": 1047 - }, - { - "epoch": 2.77, - "learning_rate": 3.662198391420912e-05, - "loss": 0.2689, - "step": 1048 - }, - { - "epoch": 2.78, - "learning_rate": 3.660857908847185e-05, - "loss": 0.0066, - "step": 1049 - }, - { - "epoch": 2.78, - "learning_rate": 3.659517426273459e-05, - "loss": 0.0328, - "step": 1050 - }, - { - "epoch": 2.78, - "learning_rate": 3.658176943699732e-05, - "loss": 0.1273, - "step": 1051 - }, - { - "epoch": 2.78, - "learning_rate": 3.6568364611260054e-05, - "loss": 0.2346, - "step": 1052 - }, - { - "epoch": 2.79, - "learning_rate": 3.655495978552279e-05, - "loss": 0.0118, - "step": 1053 - }, - { - "epoch": 2.79, - "learning_rate": 3.6541554959785525e-05, - "loss": 0.0287, - "step": 1054 - }, - { - "epoch": 2.79, - "learning_rate": 3.652815013404826e-05, - "loss": 0.264, - "step": 1055 - }, - { - "epoch": 2.79, - "learning_rate": 3.6514745308310995e-05, - "loss": 0.0216, - "step": 1056 - }, - { - "epoch": 2.8, - "learning_rate": 3.650134048257372e-05, - "loss": 0.0261, - "step": 1057 - }, - { - "epoch": 2.8, - "learning_rate": 3.6487935656836465e-05, - "loss": 0.1911, - "step": 1058 - }, - { - "epoch": 2.8, - "learning_rate": 3.6474530831099194e-05, - "loss": 0.029, - "step": 1059 - }, - { - "epoch": 2.8, - "learning_rate": 3.6461126005361935e-05, - "loss": 0.0393, - "step": 1060 - }, - { - "epoch": 2.81, - "learning_rate": 3.6447721179624664e-05, - "loss": 0.1044, - "step": 1061 - }, - { - "epoch": 2.81, - "learning_rate": 3.64343163538874e-05, - "loss": 0.6364, - "step": 1062 - }, - { - "epoch": 2.81, - "learning_rate": 3.6420911528150134e-05, - "loss": 0.3589, - "step": 1063 - }, - { - "epoch": 2.81, - "learning_rate": 3.640750670241287e-05, - "loss": 0.1128, - "step": 1064 - }, - { - "epoch": 2.82, - "learning_rate": 3.6394101876675604e-05, - "loss": 0.2219, - "step": 1065 - }, - { - "epoch": 2.82, - "learning_rate": 3.638069705093834e-05, - "loss": 0.211, - "step": 1066 - }, - { - "epoch": 2.82, - "learning_rate": 3.6367292225201075e-05, - "loss": 0.387, - "step": 1067 - }, - { - "epoch": 2.83, - "learning_rate": 3.635388739946381e-05, - "loss": 0.0725, - "step": 1068 - }, - { - "epoch": 2.83, - "learning_rate": 3.6340482573726545e-05, - "loss": 0.0268, - "step": 1069 - }, - { - "epoch": 2.83, - "learning_rate": 3.632707774798928e-05, - "loss": 0.516, - "step": 1070 - }, - { - "epoch": 2.83, - "learning_rate": 3.6313672922252015e-05, - "loss": 0.0746, - "step": 1071 - }, - { - "epoch": 2.84, - "learning_rate": 3.6300268096514743e-05, - "loss": 0.2486, - "step": 1072 - }, - { - "epoch": 2.84, - "learning_rate": 3.6286863270777485e-05, - "loss": 0.1584, - "step": 1073 - }, - { - "epoch": 2.84, - "learning_rate": 3.6273458445040214e-05, - "loss": 0.1301, - "step": 1074 - }, - { - "epoch": 2.84, - "learning_rate": 3.6260053619302956e-05, - "loss": 0.0122, - "step": 1075 - }, - { - "epoch": 2.85, - "learning_rate": 3.6246648793565684e-05, - "loss": 0.0215, - "step": 1076 - }, - { - "epoch": 2.85, - "learning_rate": 3.623324396782842e-05, - "loss": 0.2068, - "step": 1077 - }, - { - "epoch": 2.85, - "learning_rate": 3.6219839142091154e-05, - "loss": 0.1882, - "step": 1078 - }, - { - "epoch": 2.85, - "learning_rate": 3.620643431635389e-05, - "loss": 0.368, - "step": 1079 - }, - { - "epoch": 2.86, - "learning_rate": 3.6193029490616625e-05, - "loss": 0.7537, - "step": 1080 - }, - { - "epoch": 2.86, - "learning_rate": 3.617962466487936e-05, - "loss": 0.1614, - "step": 1081 - }, - { - "epoch": 2.86, - "learning_rate": 3.616621983914209e-05, - "loss": 0.0585, - "step": 1082 - }, - { - "epoch": 2.87, - "learning_rate": 3.615281501340483e-05, - "loss": 0.0535, - "step": 1083 - }, - { - "epoch": 2.87, - "learning_rate": 3.613941018766756e-05, - "loss": 0.6518, - "step": 1084 - }, - { - "epoch": 2.87, - "learning_rate": 3.61260053619303e-05, - "loss": 0.4885, - "step": 1085 - }, - { - "epoch": 2.87, - "learning_rate": 3.611260053619303e-05, - "loss": 0.4078, - "step": 1086 - }, - { - "epoch": 2.88, - "learning_rate": 3.6099195710455764e-05, - "loss": 0.2101, - "step": 1087 - }, - { - "epoch": 2.88, - "learning_rate": 3.60857908847185e-05, - "loss": 0.0192, - "step": 1088 - }, - { - "epoch": 2.88, - "learning_rate": 3.6072386058981234e-05, - "loss": 0.3885, - "step": 1089 - }, - { - "epoch": 2.88, - "learning_rate": 3.605898123324397e-05, - "loss": 0.0393, - "step": 1090 - }, - { - "epoch": 2.89, - "learning_rate": 3.6045576407506704e-05, - "loss": 0.2179, - "step": 1091 - }, - { - "epoch": 2.89, - "learning_rate": 3.603217158176944e-05, - "loss": 0.1814, - "step": 1092 - }, - { - "epoch": 2.89, - "learning_rate": 3.6018766756032174e-05, - "loss": 0.0647, - "step": 1093 - }, - { - "epoch": 2.89, - "learning_rate": 3.600536193029491e-05, - "loss": 0.0657, - "step": 1094 - }, - { - "epoch": 2.9, - "learning_rate": 3.5991957104557645e-05, - "loss": 0.1062, - "step": 1095 - }, - { - "epoch": 2.9, - "learning_rate": 3.597855227882038e-05, - "loss": 0.4314, - "step": 1096 - }, - { - "epoch": 2.9, - "learning_rate": 3.596514745308311e-05, - "loss": 0.3074, - "step": 1097 - }, - { - "epoch": 2.9, - "learning_rate": 3.595174262734585e-05, - "loss": 0.0159, - "step": 1098 - }, - { - "epoch": 2.91, - "learning_rate": 3.593833780160858e-05, - "loss": 0.3829, - "step": 1099 - }, - { - "epoch": 2.91, - "learning_rate": 3.592493297587132e-05, - "loss": 0.3277, - "step": 1100 - }, - { - "epoch": 2.91, - "learning_rate": 3.591152815013405e-05, - "loss": 0.3785, - "step": 1101 - }, - { - "epoch": 2.92, - "learning_rate": 3.5898123324396784e-05, - "loss": 0.0162, - "step": 1102 - }, - { - "epoch": 2.92, - "learning_rate": 3.588471849865952e-05, - "loss": 0.0772, - "step": 1103 - }, - { - "epoch": 2.92, - "learning_rate": 3.5871313672922254e-05, - "loss": 0.0292, - "step": 1104 - }, - { - "epoch": 2.92, - "learning_rate": 3.585790884718499e-05, - "loss": 0.0748, - "step": 1105 - }, - { - "epoch": 2.93, - "learning_rate": 3.5844504021447724e-05, - "loss": 0.2276, - "step": 1106 - }, - { - "epoch": 2.93, - "learning_rate": 3.583109919571046e-05, - "loss": 0.3174, - "step": 1107 - }, - { - "epoch": 2.93, - "learning_rate": 3.5817694369973195e-05, - "loss": 0.134, - "step": 1108 - }, - { - "epoch": 2.93, - "learning_rate": 3.580428954423592e-05, - "loss": 0.3488, - "step": 1109 - }, - { - "epoch": 2.94, - "learning_rate": 3.5790884718498665e-05, - "loss": 0.1003, - "step": 1110 - }, - { - "epoch": 2.94, - "learning_rate": 3.577747989276139e-05, - "loss": 0.1972, - "step": 1111 - }, - { - "epoch": 2.94, - "learning_rate": 3.5764075067024135e-05, - "loss": 0.2151, - "step": 1112 - }, - { - "epoch": 2.94, - "learning_rate": 3.5750670241286863e-05, - "loss": 0.3725, - "step": 1113 - }, - { - "epoch": 2.95, - "learning_rate": 3.57372654155496e-05, - "loss": 0.0227, - "step": 1114 - }, - { - "epoch": 2.95, - "learning_rate": 3.5723860589812334e-05, - "loss": 0.2063, - "step": 1115 - }, - { - "epoch": 2.95, - "learning_rate": 3.571045576407507e-05, - "loss": 0.0316, - "step": 1116 - }, - { - "epoch": 2.96, - "learning_rate": 3.5697050938337804e-05, - "loss": 0.1545, - "step": 1117 - }, - { - "epoch": 2.96, - "learning_rate": 3.568364611260054e-05, - "loss": 0.4582, - "step": 1118 - }, - { - "epoch": 2.96, - "learning_rate": 3.567024128686327e-05, - "loss": 0.5452, - "step": 1119 - }, - { - "epoch": 2.96, - "learning_rate": 3.565683646112601e-05, - "loss": 0.2009, - "step": 1120 - }, - { - "epoch": 2.97, - "learning_rate": 3.564343163538874e-05, - "loss": 0.1201, - "step": 1121 - }, - { - "epoch": 2.97, - "learning_rate": 3.563002680965148e-05, - "loss": 0.5343, - "step": 1122 - }, - { - "epoch": 2.97, - "learning_rate": 3.561662198391421e-05, - "loss": 0.3925, - "step": 1123 - }, - { - "epoch": 2.97, - "learning_rate": 3.560321715817694e-05, - "loss": 0.0375, - "step": 1124 - }, - { - "epoch": 2.98, - "learning_rate": 3.558981233243968e-05, - "loss": 0.0411, - "step": 1125 - }, - { - "epoch": 2.98, - "learning_rate": 3.5576407506702413e-05, - "loss": 0.0338, - "step": 1126 - }, - { - "epoch": 2.98, - "learning_rate": 3.556300268096515e-05, - "loss": 0.035, - "step": 1127 - }, - { - "epoch": 2.98, - "learning_rate": 3.5549597855227884e-05, - "loss": 0.0283, - "step": 1128 - }, - { - "epoch": 2.99, - "learning_rate": 3.553619302949062e-05, - "loss": 0.034, - "step": 1129 - }, - { - "epoch": 2.99, - "learning_rate": 3.5522788203753354e-05, - "loss": 0.518, - "step": 1130 - }, - { - "epoch": 2.99, - "learning_rate": 3.550938337801609e-05, - "loss": 0.0241, - "step": 1131 - }, - { - "epoch": 2.99, - "learning_rate": 3.5495978552278824e-05, - "loss": 0.3143, - "step": 1132 - }, - { - "epoch": 3.0, - "learning_rate": 3.548257372654156e-05, - "loss": 0.8011, - "step": 1133 - }, - { - "epoch": 3.0, - "learning_rate": 3.546916890080429e-05, - "loss": 0.4365, - "step": 1134 - }, - { - "epoch": 3.0, - "eval_f1": 0.7867219917012448, - "eval_loss": 0.613310694694519, - "eval_runtime": 1.9007, - "eval_samples_per_second": 796.041, - "eval_steps_per_second": 49.983, - "step": 1134 - }, - { - "epoch": 3.0, - "learning_rate": 3.545576407506703e-05, - "loss": 0.0111, - "step": 1135 - }, - { - "epoch": 3.01, - "learning_rate": 3.544235924932976e-05, - "loss": 0.0166, - "step": 1136 - }, - { - "epoch": 3.01, - "learning_rate": 3.54289544235925e-05, - "loss": 0.2159, - "step": 1137 - }, - { - "epoch": 3.01, - "learning_rate": 3.541554959785523e-05, - "loss": 0.0096, - "step": 1138 - }, - { - "epoch": 3.01, - "learning_rate": 3.540214477211796e-05, - "loss": 0.1352, - "step": 1139 - }, - { - "epoch": 3.02, - "learning_rate": 3.53887399463807e-05, - "loss": 0.0195, - "step": 1140 - }, - { - "epoch": 3.02, - "learning_rate": 3.5375335120643434e-05, - "loss": 0.1579, - "step": 1141 - }, - { - "epoch": 3.02, - "learning_rate": 3.536193029490617e-05, - "loss": 0.0078, - "step": 1142 - }, - { - "epoch": 3.02, - "learning_rate": 3.5348525469168904e-05, - "loss": 0.0111, - "step": 1143 - }, - { - "epoch": 3.03, - "learning_rate": 3.533512064343163e-05, - "loss": 0.2457, - "step": 1144 - }, - { - "epoch": 3.03, - "learning_rate": 3.5321715817694374e-05, - "loss": 0.014, - "step": 1145 - }, - { - "epoch": 3.03, - "learning_rate": 3.53083109919571e-05, - "loss": 0.2021, - "step": 1146 - }, - { - "epoch": 3.03, - "learning_rate": 3.5294906166219844e-05, - "loss": 0.5334, - "step": 1147 - }, - { - "epoch": 3.04, - "learning_rate": 3.528150134048257e-05, - "loss": 0.0116, - "step": 1148 - }, - { - "epoch": 3.04, - "learning_rate": 3.526809651474531e-05, - "loss": 0.0099, - "step": 1149 - }, - { - "epoch": 3.04, - "learning_rate": 3.525469168900804e-05, - "loss": 0.2102, - "step": 1150 - }, - { - "epoch": 3.04, - "learning_rate": 3.524128686327078e-05, - "loss": 0.0093, - "step": 1151 - }, - { - "epoch": 3.05, - "learning_rate": 3.522788203753351e-05, - "loss": 0.0112, - "step": 1152 - }, - { - "epoch": 3.05, - "learning_rate": 3.521447721179625e-05, - "loss": 0.1761, - "step": 1153 - }, - { - "epoch": 3.05, - "learning_rate": 3.5201072386058984e-05, - "loss": 0.1608, - "step": 1154 - }, - { - "epoch": 3.06, - "learning_rate": 3.518766756032172e-05, - "loss": 0.2883, - "step": 1155 - }, - { - "epoch": 3.06, - "learning_rate": 3.5174262734584454e-05, - "loss": 0.0304, - "step": 1156 - }, - { - "epoch": 3.06, - "learning_rate": 3.516085790884719e-05, - "loss": 0.0623, - "step": 1157 - }, - { - "epoch": 3.06, - "learning_rate": 3.5147453083109924e-05, - "loss": 0.1824, - "step": 1158 - }, - { - "epoch": 3.07, - "learning_rate": 3.513404825737265e-05, - "loss": 0.2527, - "step": 1159 - }, - { - "epoch": 3.07, - "learning_rate": 3.5120643431635394e-05, - "loss": 0.0877, - "step": 1160 - }, - { - "epoch": 3.07, - "learning_rate": 3.510723860589812e-05, - "loss": 0.2735, - "step": 1161 - }, - { - "epoch": 3.07, - "learning_rate": 3.5093833780160865e-05, - "loss": 0.1126, - "step": 1162 - }, - { - "epoch": 3.08, - "learning_rate": 3.508042895442359e-05, - "loss": 0.2498, - "step": 1163 - }, - { - "epoch": 3.08, - "learning_rate": 3.506702412868633e-05, - "loss": 0.022, - "step": 1164 - }, - { - "epoch": 3.08, - "learning_rate": 3.505361930294906e-05, - "loss": 0.2768, - "step": 1165 - }, - { - "epoch": 3.08, - "learning_rate": 3.50402144772118e-05, - "loss": 0.0429, - "step": 1166 - }, - { - "epoch": 3.09, - "learning_rate": 3.5026809651474533e-05, - "loss": 0.0198, - "step": 1167 - }, - { - "epoch": 3.09, - "learning_rate": 3.501340482573727e-05, - "loss": 0.0097, - "step": 1168 - }, - { - "epoch": 3.09, - "learning_rate": 3.5e-05, - "loss": 0.0276, - "step": 1169 - }, - { - "epoch": 3.1, - "learning_rate": 3.498659517426274e-05, - "loss": 0.2276, - "step": 1170 - }, - { - "epoch": 3.1, - "learning_rate": 3.497319034852547e-05, - "loss": 0.0461, - "step": 1171 - }, - { - "epoch": 3.1, - "learning_rate": 3.495978552278821e-05, - "loss": 0.0103, - "step": 1172 - }, - { - "epoch": 3.1, - "learning_rate": 3.494638069705094e-05, - "loss": 0.1455, - "step": 1173 - }, - { - "epoch": 3.11, - "learning_rate": 3.493297587131367e-05, - "loss": 0.0865, - "step": 1174 - }, - { - "epoch": 3.11, - "learning_rate": 3.491957104557641e-05, - "loss": 0.3226, - "step": 1175 - }, - { - "epoch": 3.11, - "learning_rate": 3.490616621983914e-05, - "loss": 0.1744, - "step": 1176 - }, - { - "epoch": 3.11, - "learning_rate": 3.489276139410188e-05, - "loss": 0.0148, - "step": 1177 - }, - { - "epoch": 3.12, - "learning_rate": 3.487935656836461e-05, - "loss": 0.2582, - "step": 1178 - }, - { - "epoch": 3.12, - "learning_rate": 3.486595174262735e-05, - "loss": 0.2782, - "step": 1179 - }, - { - "epoch": 3.12, - "learning_rate": 3.485254691689008e-05, - "loss": 0.143, - "step": 1180 - }, - { - "epoch": 3.12, - "learning_rate": 3.483914209115282e-05, - "loss": 0.0853, - "step": 1181 - }, - { - "epoch": 3.13, - "learning_rate": 3.4825737265415554e-05, - "loss": 0.1361, - "step": 1182 - }, - { - "epoch": 3.13, - "learning_rate": 3.481233243967829e-05, - "loss": 0.0883, - "step": 1183 - }, - { - "epoch": 3.13, - "learning_rate": 3.479892761394102e-05, - "loss": 0.0116, - "step": 1184 - }, - { - "epoch": 3.13, - "learning_rate": 3.478552278820376e-05, - "loss": 0.0531, - "step": 1185 - }, - { - "epoch": 3.14, - "learning_rate": 3.477211796246649e-05, - "loss": 0.0184, - "step": 1186 - }, - { - "epoch": 3.14, - "learning_rate": 3.475871313672923e-05, - "loss": 0.1601, - "step": 1187 - }, - { - "epoch": 3.14, - "learning_rate": 3.474530831099196e-05, - "loss": 0.007, - "step": 1188 - }, - { - "epoch": 3.15, - "learning_rate": 3.473190348525469e-05, - "loss": 0.0101, - "step": 1189 - }, - { - "epoch": 3.15, - "learning_rate": 3.471849865951743e-05, - "loss": 0.2385, - "step": 1190 - }, - { - "epoch": 3.15, - "learning_rate": 3.470509383378016e-05, - "loss": 0.0075, - "step": 1191 - }, - { - "epoch": 3.15, - "learning_rate": 3.46916890080429e-05, - "loss": 0.0919, - "step": 1192 - }, - { - "epoch": 3.16, - "learning_rate": 3.467828418230563e-05, - "loss": 0.0162, - "step": 1193 - }, - { - "epoch": 3.16, - "learning_rate": 3.466487935656836e-05, - "loss": 0.2239, - "step": 1194 - }, - { - "epoch": 3.16, - "learning_rate": 3.4651474530831104e-05, - "loss": 0.5757, - "step": 1195 - }, - { - "epoch": 3.16, - "learning_rate": 3.463806970509383e-05, - "loss": 0.0774, - "step": 1196 - }, - { - "epoch": 3.17, - "learning_rate": 3.4624664879356574e-05, - "loss": 0.2124, - "step": 1197 - }, - { - "epoch": 3.17, - "learning_rate": 3.46112600536193e-05, - "loss": 0.0107, - "step": 1198 - }, - { - "epoch": 3.17, - "learning_rate": 3.459785522788204e-05, - "loss": 0.3179, - "step": 1199 - }, - { - "epoch": 3.17, - "learning_rate": 3.458445040214477e-05, - "loss": 0.0138, - "step": 1200 - }, - { - "epoch": 3.18, - "learning_rate": 3.457104557640751e-05, - "loss": 0.0094, - "step": 1201 - }, - { - "epoch": 3.18, - "learning_rate": 3.455764075067024e-05, - "loss": 0.0039, - "step": 1202 - }, - { - "epoch": 3.18, - "learning_rate": 3.454423592493298e-05, - "loss": 0.0745, - "step": 1203 - }, - { - "epoch": 3.19, - "learning_rate": 3.453083109919571e-05, - "loss": 0.0387, - "step": 1204 - }, - { - "epoch": 3.19, - "learning_rate": 3.451742627345845e-05, - "loss": 0.1378, - "step": 1205 - }, - { - "epoch": 3.19, - "learning_rate": 3.450402144772118e-05, - "loss": 0.1299, - "step": 1206 - }, - { - "epoch": 3.19, - "learning_rate": 3.449061662198392e-05, - "loss": 0.2821, - "step": 1207 - }, - { - "epoch": 3.2, - "learning_rate": 3.4477211796246653e-05, - "loss": 0.2236, - "step": 1208 - }, - { - "epoch": 3.2, - "learning_rate": 3.446380697050938e-05, - "loss": 0.1436, - "step": 1209 - }, - { - "epoch": 3.2, - "learning_rate": 3.4450402144772124e-05, - "loss": 0.1504, - "step": 1210 - }, - { - "epoch": 3.2, - "learning_rate": 3.443699731903485e-05, - "loss": 0.0415, - "step": 1211 - }, - { - "epoch": 3.21, - "learning_rate": 3.4423592493297594e-05, - "loss": 0.023, - "step": 1212 - }, - { - "epoch": 3.21, - "learning_rate": 3.441018766756032e-05, - "loss": 0.2128, - "step": 1213 - }, - { - "epoch": 3.21, - "learning_rate": 3.439678284182306e-05, - "loss": 0.0066, - "step": 1214 - }, - { - "epoch": 3.21, - "learning_rate": 3.438337801608579e-05, - "loss": 0.4345, - "step": 1215 - }, - { - "epoch": 3.22, - "learning_rate": 3.436997319034853e-05, - "loss": 0.0214, - "step": 1216 - }, - { - "epoch": 3.22, - "learning_rate": 3.435656836461126e-05, - "loss": 0.2094, - "step": 1217 - }, - { - "epoch": 3.22, - "learning_rate": 3.4343163538874e-05, - "loss": 0.0822, - "step": 1218 - }, - { - "epoch": 3.22, - "learning_rate": 3.4329758713136726e-05, - "loss": 0.1153, - "step": 1219 - }, - { - "epoch": 3.23, - "learning_rate": 3.431635388739947e-05, - "loss": 0.0059, - "step": 1220 - }, - { - "epoch": 3.23, - "learning_rate": 3.43029490616622e-05, - "loss": 0.0069, - "step": 1221 - }, - { - "epoch": 3.23, - "learning_rate": 3.428954423592494e-05, - "loss": 0.044, - "step": 1222 - }, - { - "epoch": 3.24, - "learning_rate": 3.427613941018767e-05, - "loss": 0.1975, - "step": 1223 - }, - { - "epoch": 3.24, - "learning_rate": 3.42627345844504e-05, - "loss": 0.3294, - "step": 1224 - }, - { - "epoch": 3.24, - "learning_rate": 3.424932975871314e-05, - "loss": 0.026, - "step": 1225 - }, - { - "epoch": 3.24, - "learning_rate": 3.423592493297587e-05, - "loss": 0.2666, - "step": 1226 - }, - { - "epoch": 3.25, - "learning_rate": 3.422252010723861e-05, - "loss": 0.0628, - "step": 1227 - }, - { - "epoch": 3.25, - "learning_rate": 3.420911528150134e-05, - "loss": 0.0068, - "step": 1228 - }, - { - "epoch": 3.25, - "learning_rate": 3.419571045576407e-05, - "loss": 0.0144, - "step": 1229 - }, - { - "epoch": 3.25, - "learning_rate": 3.418230563002681e-05, - "loss": 0.0029, - "step": 1230 - }, - { - "epoch": 3.26, - "learning_rate": 3.416890080428954e-05, - "loss": 0.606, - "step": 1231 - }, - { - "epoch": 3.26, - "learning_rate": 3.415549597855228e-05, - "loss": 0.2162, - "step": 1232 - }, - { - "epoch": 3.26, - "learning_rate": 3.414209115281501e-05, - "loss": 0.146, - "step": 1233 - }, - { - "epoch": 3.26, - "learning_rate": 3.412868632707775e-05, - "loss": 0.3649, - "step": 1234 - }, - { - "epoch": 3.27, - "learning_rate": 3.411528150134048e-05, - "loss": 0.0062, - "step": 1235 - }, - { - "epoch": 3.27, - "learning_rate": 3.410187667560322e-05, - "loss": 0.4097, - "step": 1236 - }, - { - "epoch": 3.27, - "learning_rate": 3.408847184986595e-05, - "loss": 0.5354, - "step": 1237 - }, - { - "epoch": 3.28, - "learning_rate": 3.407506702412869e-05, - "loss": 0.6222, - "step": 1238 - }, - { - "epoch": 3.28, - "learning_rate": 3.406166219839142e-05, - "loss": 0.0023, - "step": 1239 - }, - { - "epoch": 3.28, - "learning_rate": 3.404825737265416e-05, - "loss": 0.0247, - "step": 1240 - }, - { - "epoch": 3.28, - "learning_rate": 3.403485254691689e-05, - "loss": 0.0051, - "step": 1241 - }, - { - "epoch": 3.29, - "learning_rate": 3.402144772117963e-05, - "loss": 0.2504, - "step": 1242 - }, - { - "epoch": 3.29, - "learning_rate": 3.400804289544236e-05, - "loss": 0.0195, - "step": 1243 - }, - { - "epoch": 3.29, - "learning_rate": 3.39946380697051e-05, - "loss": 0.3706, - "step": 1244 - }, - { - "epoch": 3.29, - "learning_rate": 3.398123324396783e-05, - "loss": 0.0174, - "step": 1245 - }, - { - "epoch": 3.3, - "learning_rate": 3.396782841823056e-05, - "loss": 0.0068, - "step": 1246 - }, - { - "epoch": 3.3, - "learning_rate": 3.39544235924933e-05, - "loss": 0.3938, - "step": 1247 - }, - { - "epoch": 3.3, - "learning_rate": 3.394101876675603e-05, - "loss": 0.0114, - "step": 1248 - }, - { - "epoch": 3.3, - "learning_rate": 3.3927613941018774e-05, - "loss": 0.0088, - "step": 1249 - }, - { - "epoch": 3.31, - "learning_rate": 3.39142091152815e-05, - "loss": 0.0126, - "step": 1250 - }, - { - "epoch": 3.31, - "learning_rate": 3.390080428954424e-05, - "loss": 0.0091, - "step": 1251 - }, - { - "epoch": 3.31, - "learning_rate": 3.388739946380697e-05, - "loss": 0.0232, - "step": 1252 - }, - { - "epoch": 3.31, - "learning_rate": 3.387399463806971e-05, - "loss": 0.3704, - "step": 1253 - }, - { - "epoch": 3.32, - "learning_rate": 3.386058981233244e-05, - "loss": 0.0112, - "step": 1254 - }, - { - "epoch": 3.32, - "learning_rate": 3.384718498659518e-05, - "loss": 0.1709, - "step": 1255 - }, - { - "epoch": 3.32, - "learning_rate": 3.3833780160857906e-05, - "loss": 0.0109, - "step": 1256 - }, - { - "epoch": 3.33, - "learning_rate": 3.382037533512065e-05, - "loss": 0.2874, - "step": 1257 - }, - { - "epoch": 3.33, - "learning_rate": 3.3806970509383376e-05, - "loss": 0.024, - "step": 1258 - }, - { - "epoch": 3.33, - "learning_rate": 3.379356568364612e-05, - "loss": 0.0131, - "step": 1259 - }, - { - "epoch": 3.33, - "learning_rate": 3.3780160857908846e-05, - "loss": 0.2076, - "step": 1260 - }, - { - "epoch": 3.34, - "learning_rate": 3.376675603217158e-05, - "loss": 0.0083, - "step": 1261 - }, - { - "epoch": 3.34, - "learning_rate": 3.375335120643432e-05, - "loss": 0.0234, - "step": 1262 - }, - { - "epoch": 3.34, - "learning_rate": 3.373994638069705e-05, - "loss": 0.0066, - "step": 1263 - }, - { - "epoch": 3.34, - "learning_rate": 3.372654155495979e-05, - "loss": 0.3983, - "step": 1264 - }, - { - "epoch": 3.35, - "learning_rate": 3.371313672922252e-05, - "loss": 0.0648, - "step": 1265 - }, - { - "epoch": 3.35, - "learning_rate": 3.369973190348526e-05, - "loss": 0.006, - "step": 1266 - }, - { - "epoch": 3.35, - "learning_rate": 3.368632707774799e-05, - "loss": 0.0807, - "step": 1267 - }, - { - "epoch": 3.35, - "learning_rate": 3.367292225201073e-05, - "loss": 0.0975, - "step": 1268 - }, - { - "epoch": 3.36, - "learning_rate": 3.365951742627346e-05, - "loss": 0.2934, - "step": 1269 - }, - { - "epoch": 3.36, - "learning_rate": 3.36461126005362e-05, - "loss": 0.0869, - "step": 1270 - }, - { - "epoch": 3.36, - "learning_rate": 3.3632707774798926e-05, - "loss": 0.1374, - "step": 1271 - }, - { - "epoch": 3.37, - "learning_rate": 3.361930294906167e-05, - "loss": 0.3314, - "step": 1272 - }, - { - "epoch": 3.37, - "learning_rate": 3.3605898123324396e-05, - "loss": 0.0045, - "step": 1273 - }, - { - "epoch": 3.37, - "learning_rate": 3.359249329758714e-05, - "loss": 0.0536, - "step": 1274 - }, - { - "epoch": 3.37, - "learning_rate": 3.3579088471849867e-05, - "loss": 0.0564, - "step": 1275 - }, - { - "epoch": 3.38, - "learning_rate": 3.35656836461126e-05, - "loss": 0.0689, - "step": 1276 - }, - { - "epoch": 3.38, - "learning_rate": 3.355227882037534e-05, - "loss": 0.5177, - "step": 1277 - }, - { - "epoch": 3.38, - "learning_rate": 3.353887399463807e-05, - "loss": 0.0689, - "step": 1278 - }, - { - "epoch": 3.38, - "learning_rate": 3.352546916890081e-05, - "loss": 0.0664, - "step": 1279 - }, - { - "epoch": 3.39, - "learning_rate": 3.351206434316354e-05, - "loss": 0.0614, - "step": 1280 - }, - { - "epoch": 3.39, - "learning_rate": 3.349865951742627e-05, - "loss": 0.1994, - "step": 1281 - }, - { - "epoch": 3.39, - "learning_rate": 3.348525469168901e-05, - "loss": 0.4769, - "step": 1282 - }, - { - "epoch": 3.39, - "learning_rate": 3.347184986595174e-05, - "loss": 0.1851, - "step": 1283 - }, - { - "epoch": 3.4, - "learning_rate": 3.345844504021448e-05, - "loss": 0.0092, - "step": 1284 - }, - { - "epoch": 3.4, - "learning_rate": 3.344504021447721e-05, - "loss": 0.0052, - "step": 1285 - }, - { - "epoch": 3.4, - "learning_rate": 3.3431635388739946e-05, - "loss": 0.0095, - "step": 1286 - }, - { - "epoch": 3.4, - "learning_rate": 3.341823056300268e-05, - "loss": 0.0242, - "step": 1287 - }, - { - "epoch": 3.41, - "learning_rate": 3.3404825737265416e-05, - "loss": 0.0565, - "step": 1288 - }, - { - "epoch": 3.41, - "learning_rate": 3.339142091152815e-05, - "loss": 0.2645, - "step": 1289 - }, - { - "epoch": 3.41, - "learning_rate": 3.337801608579089e-05, - "loss": 0.0049, - "step": 1290 - }, - { - "epoch": 3.42, - "learning_rate": 3.336461126005362e-05, - "loss": 0.0929, - "step": 1291 - }, - { - "epoch": 3.42, - "learning_rate": 3.335120643431636e-05, - "loss": 0.3968, - "step": 1292 - }, - { - "epoch": 3.42, - "learning_rate": 3.333780160857909e-05, - "loss": 0.033, - "step": 1293 - }, - { - "epoch": 3.42, - "learning_rate": 3.332439678284183e-05, - "loss": 0.007, - "step": 1294 - }, - { - "epoch": 3.43, - "learning_rate": 3.331099195710456e-05, - "loss": 0.2552, - "step": 1295 - }, - { - "epoch": 3.43, - "learning_rate": 3.329758713136729e-05, - "loss": 0.004, - "step": 1296 - }, - { - "epoch": 3.43, - "learning_rate": 3.328418230563003e-05, - "loss": 0.136, - "step": 1297 - }, - { - "epoch": 3.43, - "learning_rate": 3.327077747989276e-05, - "loss": 0.1407, - "step": 1298 - }, - { - "epoch": 3.44, - "learning_rate": 3.32573726541555e-05, - "loss": 0.0354, - "step": 1299 - }, - { - "epoch": 3.44, - "learning_rate": 3.324396782841823e-05, - "loss": 0.6141, - "step": 1300 - }, - { - "epoch": 3.44, - "learning_rate": 3.3230563002680966e-05, - "loss": 0.2544, - "step": 1301 - }, - { - "epoch": 3.44, - "learning_rate": 3.32171581769437e-05, - "loss": 0.0046, - "step": 1302 - }, - { - "epoch": 3.45, - "learning_rate": 3.320375335120644e-05, - "loss": 0.0126, - "step": 1303 - }, - { - "epoch": 3.45, - "learning_rate": 3.319034852546917e-05, - "loss": 0.3506, - "step": 1304 - }, - { - "epoch": 3.45, - "learning_rate": 3.317694369973191e-05, - "loss": 0.3512, - "step": 1305 - }, - { - "epoch": 3.46, - "learning_rate": 3.3163538873994635e-05, - "loss": 0.3675, - "step": 1306 - }, - { - "epoch": 3.46, - "learning_rate": 3.315013404825738e-05, - "loss": 0.1676, - "step": 1307 - }, - { - "epoch": 3.46, - "learning_rate": 3.3136729222520106e-05, - "loss": 0.0307, - "step": 1308 - }, - { - "epoch": 3.46, - "learning_rate": 3.312332439678285e-05, - "loss": 0.0084, - "step": 1309 - }, - { - "epoch": 3.47, - "learning_rate": 3.3109919571045576e-05, - "loss": 0.1977, - "step": 1310 - }, - { - "epoch": 3.47, - "learning_rate": 3.309651474530831e-05, - "loss": 0.1645, - "step": 1311 - }, - { - "epoch": 3.47, - "learning_rate": 3.3083109919571046e-05, - "loss": 0.2579, - "step": 1312 - }, - { - "epoch": 3.47, - "learning_rate": 3.306970509383378e-05, - "loss": 0.1656, - "step": 1313 - }, - { - "epoch": 3.48, - "learning_rate": 3.3056300268096516e-05, - "loss": 0.0168, - "step": 1314 - }, - { - "epoch": 3.48, - "learning_rate": 3.304289544235925e-05, - "loss": 0.0291, - "step": 1315 - }, - { - "epoch": 3.48, - "learning_rate": 3.302949061662198e-05, - "loss": 0.0146, - "step": 1316 - }, - { - "epoch": 3.48, - "learning_rate": 3.301608579088472e-05, - "loss": 0.0037, - "step": 1317 - }, - { - "epoch": 3.49, - "learning_rate": 3.300268096514745e-05, - "loss": 0.0113, - "step": 1318 - }, - { - "epoch": 3.49, - "learning_rate": 3.298927613941019e-05, - "loss": 0.0734, - "step": 1319 - }, - { - "epoch": 3.49, - "learning_rate": 3.297587131367292e-05, - "loss": 0.0292, - "step": 1320 - }, - { - "epoch": 3.49, - "learning_rate": 3.2962466487935655e-05, - "loss": 0.3875, - "step": 1321 - }, - { - "epoch": 3.5, - "learning_rate": 3.294906166219839e-05, - "loss": 0.0138, - "step": 1322 - }, - { - "epoch": 3.5, - "learning_rate": 3.2935656836461126e-05, - "loss": 0.4653, - "step": 1323 - }, - { - "epoch": 3.5, - "learning_rate": 3.292225201072386e-05, - "loss": 0.1864, - "step": 1324 - }, - { - "epoch": 3.51, - "learning_rate": 3.2908847184986596e-05, - "loss": 0.0116, - "step": 1325 - }, - { - "epoch": 3.51, - "learning_rate": 3.289544235924933e-05, - "loss": 0.014, - "step": 1326 - }, - { - "epoch": 3.51, - "learning_rate": 3.2882037533512066e-05, - "loss": 0.3344, - "step": 1327 - }, - { - "epoch": 3.51, - "learning_rate": 3.28686327077748e-05, - "loss": 0.1544, - "step": 1328 - }, - { - "epoch": 3.52, - "learning_rate": 3.2855227882037537e-05, - "loss": 0.0065, - "step": 1329 - }, - { - "epoch": 3.52, - "learning_rate": 3.284182305630027e-05, - "loss": 0.0041, - "step": 1330 - }, - { - "epoch": 3.52, - "learning_rate": 3.2828418230563e-05, - "loss": 0.0044, - "step": 1331 - }, - { - "epoch": 3.52, - "learning_rate": 3.281501340482574e-05, - "loss": 0.1808, - "step": 1332 - }, - { - "epoch": 3.53, - "learning_rate": 3.280160857908847e-05, - "loss": 0.0521, - "step": 1333 - }, - { - "epoch": 3.53, - "learning_rate": 3.278820375335121e-05, - "loss": 0.3505, - "step": 1334 - }, - { - "epoch": 3.53, - "learning_rate": 3.277479892761394e-05, - "loss": 0.2032, - "step": 1335 - }, - { - "epoch": 3.53, - "learning_rate": 3.2761394101876676e-05, - "loss": 0.004, - "step": 1336 - }, - { - "epoch": 3.54, - "learning_rate": 3.274798927613941e-05, - "loss": 0.0343, - "step": 1337 - }, - { - "epoch": 3.54, - "learning_rate": 3.2734584450402146e-05, - "loss": 0.278, - "step": 1338 - }, - { - "epoch": 3.54, - "learning_rate": 3.272117962466488e-05, - "loss": 0.0056, - "step": 1339 - }, - { - "epoch": 3.54, - "learning_rate": 3.2707774798927616e-05, - "loss": 0.1673, - "step": 1340 - }, - { - "epoch": 3.55, - "learning_rate": 3.2694369973190345e-05, - "loss": 0.0092, - "step": 1341 - }, - { - "epoch": 3.55, - "learning_rate": 3.2680965147453086e-05, - "loss": 0.0058, - "step": 1342 - }, - { - "epoch": 3.55, - "learning_rate": 3.2667560321715815e-05, - "loss": 0.097, - "step": 1343 - }, - { - "epoch": 3.56, - "learning_rate": 3.265415549597856e-05, - "loss": 0.2138, - "step": 1344 - }, - { - "epoch": 3.56, - "learning_rate": 3.2640750670241285e-05, - "loss": 0.0077, - "step": 1345 - }, - { - "epoch": 3.56, - "learning_rate": 3.262734584450402e-05, - "loss": 0.2294, - "step": 1346 - }, - { - "epoch": 3.56, - "learning_rate": 3.2613941018766755e-05, - "loss": 0.3282, - "step": 1347 - }, - { - "epoch": 3.57, - "learning_rate": 3.260053619302949e-05, - "loss": 0.233, - "step": 1348 - }, - { - "epoch": 3.57, - "learning_rate": 3.2587131367292226e-05, - "loss": 0.0379, - "step": 1349 - }, - { - "epoch": 3.57, - "learning_rate": 3.257372654155496e-05, - "loss": 0.2168, - "step": 1350 - }, - { - "epoch": 3.57, - "learning_rate": 3.2560321715817696e-05, - "loss": 0.0443, - "step": 1351 - }, - { - "epoch": 3.58, - "learning_rate": 3.254691689008043e-05, - "loss": 0.2665, - "step": 1352 - }, - { - "epoch": 3.58, - "learning_rate": 3.2533512064343166e-05, - "loss": 0.0136, - "step": 1353 - }, - { - "epoch": 3.58, - "learning_rate": 3.25201072386059e-05, - "loss": 0.0035, - "step": 1354 - }, - { - "epoch": 3.58, - "learning_rate": 3.2506702412868636e-05, - "loss": 0.2153, - "step": 1355 - }, - { - "epoch": 3.59, - "learning_rate": 3.249329758713137e-05, - "loss": 0.088, - "step": 1356 - }, - { - "epoch": 3.59, - "learning_rate": 3.247989276139411e-05, - "loss": 0.0074, - "step": 1357 - }, - { - "epoch": 3.59, - "learning_rate": 3.2466487935656835e-05, - "loss": 0.0924, - "step": 1358 - }, - { - "epoch": 3.6, - "learning_rate": 3.245308310991958e-05, - "loss": 0.0171, - "step": 1359 - }, - { - "epoch": 3.6, - "learning_rate": 3.2439678284182305e-05, - "loss": 0.0132, - "step": 1360 - }, - { - "epoch": 3.6, - "learning_rate": 3.242627345844505e-05, - "loss": 0.0583, - "step": 1361 - }, - { - "epoch": 3.6, - "learning_rate": 3.2412868632707776e-05, - "loss": 0.0038, - "step": 1362 - }, - { - "epoch": 3.61, - "learning_rate": 3.239946380697051e-05, - "loss": 0.0846, - "step": 1363 - }, - { - "epoch": 3.61, - "learning_rate": 3.2386058981233246e-05, - "loss": 0.0058, - "step": 1364 - }, - { - "epoch": 3.61, - "learning_rate": 3.237265415549598e-05, - "loss": 0.4456, - "step": 1365 - }, - { - "epoch": 3.61, - "learning_rate": 3.2359249329758716e-05, - "loss": 0.0029, - "step": 1366 - }, - { - "epoch": 3.62, - "learning_rate": 3.234584450402145e-05, - "loss": 0.2553, - "step": 1367 - }, - { - "epoch": 3.62, - "learning_rate": 3.233243967828418e-05, - "loss": 0.0936, - "step": 1368 - }, - { - "epoch": 3.62, - "learning_rate": 3.231903485254692e-05, - "loss": 0.1017, - "step": 1369 - }, - { - "epoch": 3.62, - "learning_rate": 3.230563002680965e-05, - "loss": 0.0379, - "step": 1370 - }, - { - "epoch": 3.63, - "learning_rate": 3.229222520107239e-05, - "loss": 0.0069, - "step": 1371 - }, - { - "epoch": 3.63, - "learning_rate": 3.227882037533512e-05, - "loss": 0.3235, - "step": 1372 - }, - { - "epoch": 3.63, - "learning_rate": 3.2265415549597855e-05, - "loss": 0.3796, - "step": 1373 - }, - { - "epoch": 3.63, - "learning_rate": 3.225201072386059e-05, - "loss": 0.3246, - "step": 1374 - }, - { - "epoch": 3.64, - "learning_rate": 3.2238605898123325e-05, - "loss": 0.0059, - "step": 1375 - }, - { - "epoch": 3.64, - "learning_rate": 3.222520107238606e-05, - "loss": 0.0405, - "step": 1376 - }, - { - "epoch": 3.64, - "learning_rate": 3.2211796246648796e-05, - "loss": 0.0142, - "step": 1377 - }, - { - "epoch": 3.65, - "learning_rate": 3.219839142091153e-05, - "loss": 0.4426, - "step": 1378 - }, - { - "epoch": 3.65, - "learning_rate": 3.2184986595174266e-05, - "loss": 0.0249, - "step": 1379 - }, - { - "epoch": 3.65, - "learning_rate": 3.2171581769437e-05, - "loss": 0.1053, - "step": 1380 - }, - { - "epoch": 3.65, - "learning_rate": 3.2158176943699736e-05, - "loss": 0.0179, - "step": 1381 - }, - { - "epoch": 3.66, - "learning_rate": 3.214477211796247e-05, - "loss": 0.0718, - "step": 1382 - }, - { - "epoch": 3.66, - "learning_rate": 3.21313672922252e-05, - "loss": 0.1431, - "step": 1383 - }, - { - "epoch": 3.66, - "learning_rate": 3.211796246648794e-05, - "loss": 0.2391, - "step": 1384 - }, - { - "epoch": 3.66, - "learning_rate": 3.210455764075067e-05, - "loss": 0.0053, - "step": 1385 - }, - { - "epoch": 3.67, - "learning_rate": 3.209115281501341e-05, - "loss": 0.2935, - "step": 1386 - }, - { - "epoch": 3.67, - "learning_rate": 3.207774798927614e-05, - "loss": 0.0071, - "step": 1387 - }, - { - "epoch": 3.67, - "learning_rate": 3.2064343163538875e-05, - "loss": 0.031, - "step": 1388 - }, - { - "epoch": 3.67, - "learning_rate": 3.205093833780161e-05, - "loss": 0.1989, - "step": 1389 - }, - { - "epoch": 3.68, - "learning_rate": 3.2037533512064346e-05, - "loss": 0.0533, - "step": 1390 - }, - { - "epoch": 3.68, - "learning_rate": 3.202412868632708e-05, - "loss": 0.2408, - "step": 1391 - }, - { - "epoch": 3.68, - "learning_rate": 3.2010723860589816e-05, - "loss": 0.3158, - "step": 1392 - }, - { - "epoch": 3.69, - "learning_rate": 3.1997319034852544e-05, - "loss": 0.3629, - "step": 1393 - }, - { - "epoch": 3.69, - "learning_rate": 3.1983914209115286e-05, - "loss": 0.0122, - "step": 1394 - }, - { - "epoch": 3.69, - "learning_rate": 3.1970509383378014e-05, - "loss": 0.0449, - "step": 1395 - }, - { - "epoch": 3.69, - "learning_rate": 3.1957104557640756e-05, - "loss": 0.1273, - "step": 1396 - }, - { - "epoch": 3.7, - "learning_rate": 3.1943699731903485e-05, - "loss": 0.3401, - "step": 1397 - }, - { - "epoch": 3.7, - "learning_rate": 3.193029490616622e-05, - "loss": 0.0183, - "step": 1398 - }, - { - "epoch": 3.7, - "learning_rate": 3.1916890080428955e-05, - "loss": 0.0526, - "step": 1399 - }, - { - "epoch": 3.7, - "learning_rate": 3.190348525469169e-05, - "loss": 0.5037, - "step": 1400 - }, - { - "epoch": 3.71, - "learning_rate": 3.1890080428954425e-05, - "loss": 0.0059, - "step": 1401 - }, - { - "epoch": 3.71, - "learning_rate": 3.187667560321716e-05, - "loss": 0.0266, - "step": 1402 - }, - { - "epoch": 3.71, - "learning_rate": 3.1863270777479896e-05, - "loss": 0.4095, - "step": 1403 - }, - { - "epoch": 3.71, - "learning_rate": 3.184986595174263e-05, - "loss": 0.1802, - "step": 1404 - }, - { - "epoch": 3.72, - "learning_rate": 3.1836461126005366e-05, - "loss": 0.3586, - "step": 1405 - }, - { - "epoch": 3.72, - "learning_rate": 3.18230563002681e-05, - "loss": 0.2058, - "step": 1406 - }, - { - "epoch": 3.72, - "learning_rate": 3.1809651474530836e-05, - "loss": 0.008, - "step": 1407 - }, - { - "epoch": 3.72, - "learning_rate": 3.1796246648793564e-05, - "loss": 0.0282, - "step": 1408 - }, - { - "epoch": 3.73, - "learning_rate": 3.1782841823056306e-05, - "loss": 0.0077, - "step": 1409 - }, - { - "epoch": 3.73, - "learning_rate": 3.1769436997319035e-05, - "loss": 0.3461, - "step": 1410 - }, - { - "epoch": 3.73, - "learning_rate": 3.1756032171581777e-05, - "loss": 0.0038, - "step": 1411 - }, - { - "epoch": 3.74, - "learning_rate": 3.1742627345844505e-05, - "loss": 0.0087, - "step": 1412 - }, - { - "epoch": 3.74, - "learning_rate": 3.172922252010724e-05, - "loss": 0.8254, - "step": 1413 - }, - { - "epoch": 3.74, - "learning_rate": 3.1715817694369975e-05, - "loss": 0.017, - "step": 1414 - }, - { - "epoch": 3.74, - "learning_rate": 3.170241286863271e-05, - "loss": 0.2954, - "step": 1415 - }, - { - "epoch": 3.75, - "learning_rate": 3.1689008042895445e-05, - "loss": 0.0286, - "step": 1416 - }, - { - "epoch": 3.75, - "learning_rate": 3.167560321715818e-05, - "loss": 0.0454, - "step": 1417 - }, - { - "epoch": 3.75, - "learning_rate": 3.166219839142091e-05, - "loss": 0.222, - "step": 1418 - }, - { - "epoch": 3.75, - "learning_rate": 3.164879356568365e-05, - "loss": 0.0225, - "step": 1419 - }, - { - "epoch": 3.76, - "learning_rate": 3.163538873994638e-05, - "loss": 0.2599, - "step": 1420 - }, - { - "epoch": 3.76, - "learning_rate": 3.162198391420912e-05, - "loss": 0.2343, - "step": 1421 - }, - { - "epoch": 3.76, - "learning_rate": 3.160857908847185e-05, - "loss": 0.0274, - "step": 1422 - }, - { - "epoch": 3.76, - "learning_rate": 3.1595174262734585e-05, - "loss": 0.0109, - "step": 1423 - }, - { - "epoch": 3.77, - "learning_rate": 3.158176943699732e-05, - "loss": 0.012, - "step": 1424 - }, - { - "epoch": 3.77, - "learning_rate": 3.1568364611260055e-05, - "loss": 0.0267, - "step": 1425 - }, - { - "epoch": 3.77, - "learning_rate": 3.155495978552279e-05, - "loss": 0.0116, - "step": 1426 - }, - { - "epoch": 3.78, - "learning_rate": 3.1541554959785525e-05, - "loss": 0.2563, - "step": 1427 - }, - { - "epoch": 3.78, - "learning_rate": 3.1528150134048253e-05, - "loss": 0.2149, - "step": 1428 - }, - { - "epoch": 3.78, - "learning_rate": 3.1514745308310995e-05, - "loss": 0.2099, - "step": 1429 - }, - { - "epoch": 3.78, - "learning_rate": 3.1501340482573724e-05, - "loss": 0.1445, - "step": 1430 - }, - { - "epoch": 3.79, - "learning_rate": 3.1487935656836466e-05, - "loss": 0.0069, - "step": 1431 - }, - { - "epoch": 3.79, - "learning_rate": 3.1474530831099194e-05, - "loss": 0.3583, - "step": 1432 - }, - { - "epoch": 3.79, - "learning_rate": 3.146112600536193e-05, - "loss": 0.1112, - "step": 1433 - }, - { - "epoch": 3.79, - "learning_rate": 3.1447721179624664e-05, - "loss": 0.5379, - "step": 1434 - }, - { - "epoch": 3.8, - "learning_rate": 3.14343163538874e-05, - "loss": 0.0248, - "step": 1435 - }, - { - "epoch": 3.8, - "learning_rate": 3.1420911528150135e-05, - "loss": 0.0255, - "step": 1436 - }, - { - "epoch": 3.8, - "learning_rate": 3.140750670241287e-05, - "loss": 0.3363, - "step": 1437 - }, - { - "epoch": 3.8, - "learning_rate": 3.1394101876675605e-05, - "loss": 0.2952, - "step": 1438 - }, - { - "epoch": 3.81, - "learning_rate": 3.138069705093834e-05, - "loss": 0.0337, - "step": 1439 - }, - { - "epoch": 3.81, - "learning_rate": 3.1367292225201075e-05, - "loss": 0.0157, - "step": 1440 - }, - { - "epoch": 3.81, - "learning_rate": 3.135388739946381e-05, - "loss": 0.0204, - "step": 1441 - }, - { - "epoch": 3.81, - "learning_rate": 3.1340482573726545e-05, - "loss": 0.7707, - "step": 1442 - }, - { - "epoch": 3.82, - "learning_rate": 3.1327077747989274e-05, - "loss": 0.4232, - "step": 1443 - }, - { - "epoch": 3.82, - "learning_rate": 3.1313672922252016e-05, - "loss": 0.116, - "step": 1444 - }, - { - "epoch": 3.82, - "learning_rate": 3.1300268096514744e-05, - "loss": 0.421, - "step": 1445 - }, - { - "epoch": 3.83, - "learning_rate": 3.1286863270777486e-05, - "loss": 0.0267, - "step": 1446 - }, - { - "epoch": 3.83, - "learning_rate": 3.1273458445040214e-05, - "loss": 0.0078, - "step": 1447 - }, - { - "epoch": 3.83, - "learning_rate": 3.126005361930295e-05, - "loss": 0.0996, - "step": 1448 - }, - { - "epoch": 3.83, - "learning_rate": 3.1246648793565684e-05, - "loss": 0.0389, - "step": 1449 - }, - { - "epoch": 3.84, - "learning_rate": 3.123324396782842e-05, - "loss": 0.0482, - "step": 1450 - }, - { - "epoch": 3.84, - "learning_rate": 3.1219839142091155e-05, - "loss": 0.0053, - "step": 1451 - }, - { - "epoch": 3.84, - "learning_rate": 3.120643431635389e-05, - "loss": 0.0153, - "step": 1452 - }, - { - "epoch": 3.84, - "learning_rate": 3.119302949061662e-05, - "loss": 0.008, - "step": 1453 - }, - { - "epoch": 3.85, - "learning_rate": 3.117962466487936e-05, - "loss": 0.0166, - "step": 1454 - }, - { - "epoch": 3.85, - "learning_rate": 3.116621983914209e-05, - "loss": 0.0889, - "step": 1455 - }, - { - "epoch": 3.85, - "learning_rate": 3.115281501340483e-05, - "loss": 0.0695, - "step": 1456 - }, - { - "epoch": 3.85, - "learning_rate": 3.113941018766756e-05, - "loss": 0.3353, - "step": 1457 - }, - { - "epoch": 3.86, - "learning_rate": 3.1126005361930294e-05, - "loss": 0.0729, - "step": 1458 - }, - { - "epoch": 3.86, - "learning_rate": 3.111260053619303e-05, - "loss": 0.0187, - "step": 1459 - }, - { - "epoch": 3.86, - "learning_rate": 3.1099195710455764e-05, - "loss": 0.2512, - "step": 1460 - }, - { - "epoch": 3.87, - "learning_rate": 3.10857908847185e-05, - "loss": 0.3837, - "step": 1461 - }, - { - "epoch": 3.87, - "learning_rate": 3.1072386058981234e-05, - "loss": 0.2543, - "step": 1462 - }, - { - "epoch": 3.87, - "learning_rate": 3.105898123324397e-05, - "loss": 0.1797, - "step": 1463 - }, - { - "epoch": 3.87, - "learning_rate": 3.1045576407506705e-05, - "loss": 0.3097, - "step": 1464 - }, - { - "epoch": 3.88, - "learning_rate": 3.103217158176944e-05, - "loss": 0.268, - "step": 1465 - }, - { - "epoch": 3.88, - "learning_rate": 3.1018766756032175e-05, - "loss": 0.1773, - "step": 1466 - }, - { - "epoch": 3.88, - "learning_rate": 3.100536193029491e-05, - "loss": 0.2055, - "step": 1467 - }, - { - "epoch": 3.88, - "learning_rate": 3.099195710455764e-05, - "loss": 0.0279, - "step": 1468 - }, - { - "epoch": 3.89, - "learning_rate": 3.097855227882038e-05, - "loss": 0.1263, - "step": 1469 - }, - { - "epoch": 3.89, - "learning_rate": 3.096514745308311e-05, - "loss": 0.0449, - "step": 1470 - }, - { - "epoch": 3.89, - "learning_rate": 3.095174262734585e-05, - "loss": 0.2429, - "step": 1471 - }, - { - "epoch": 3.89, - "learning_rate": 3.093833780160858e-05, - "loss": 0.1245, - "step": 1472 - }, - { - "epoch": 3.9, - "learning_rate": 3.0924932975871314e-05, - "loss": 0.1303, - "step": 1473 - }, - { - "epoch": 3.9, - "learning_rate": 3.091152815013405e-05, - "loss": 0.0303, - "step": 1474 - }, - { - "epoch": 3.9, - "learning_rate": 3.0898123324396784e-05, - "loss": 0.3279, - "step": 1475 - }, - { - "epoch": 3.9, - "learning_rate": 3.088471849865952e-05, - "loss": 0.134, - "step": 1476 - }, - { - "epoch": 3.91, - "learning_rate": 3.0871313672922255e-05, - "loss": 0.5138, - "step": 1477 - }, - { - "epoch": 3.91, - "learning_rate": 3.085790884718498e-05, - "loss": 0.0476, - "step": 1478 - }, - { - "epoch": 3.91, - "learning_rate": 3.0844504021447725e-05, - "loss": 0.1956, - "step": 1479 - }, - { - "epoch": 3.92, - "learning_rate": 3.083109919571045e-05, - "loss": 0.2061, - "step": 1480 - }, - { - "epoch": 3.92, - "learning_rate": 3.0817694369973195e-05, - "loss": 0.269, - "step": 1481 - }, - { - "epoch": 3.92, - "learning_rate": 3.0804289544235923e-05, - "loss": 0.0708, - "step": 1482 - }, - { - "epoch": 3.92, - "learning_rate": 3.0790884718498665e-05, - "loss": 0.0389, - "step": 1483 - }, - { - "epoch": 3.93, - "learning_rate": 3.0777479892761394e-05, - "loss": 0.2566, - "step": 1484 - }, - { - "epoch": 3.93, - "learning_rate": 3.076407506702413e-05, - "loss": 0.0581, - "step": 1485 - }, - { - "epoch": 3.93, - "learning_rate": 3.0750670241286864e-05, - "loss": 0.1527, - "step": 1486 - }, - { - "epoch": 3.93, - "learning_rate": 3.07372654155496e-05, - "loss": 0.3963, - "step": 1487 - }, - { - "epoch": 3.94, - "learning_rate": 3.0723860589812334e-05, - "loss": 0.2241, - "step": 1488 - }, - { - "epoch": 3.94, - "learning_rate": 3.071045576407507e-05, - "loss": 0.1275, - "step": 1489 - }, - { - "epoch": 3.94, - "learning_rate": 3.0697050938337804e-05, - "loss": 0.3148, - "step": 1490 - }, - { - "epoch": 3.94, - "learning_rate": 3.068364611260054e-05, - "loss": 0.1474, - "step": 1491 - }, - { - "epoch": 3.95, - "learning_rate": 3.0670241286863275e-05, - "loss": 0.0233, - "step": 1492 - }, - { - "epoch": 3.95, - "learning_rate": 3.065683646112601e-05, - "loss": 0.1721, - "step": 1493 - }, - { - "epoch": 3.95, - "learning_rate": 3.0643431635388745e-05, - "loss": 0.6024, - "step": 1494 - }, - { - "epoch": 3.96, - "learning_rate": 3.063002680965147e-05, - "loss": 0.1425, - "step": 1495 - }, - { - "epoch": 3.96, - "learning_rate": 3.0616621983914215e-05, - "loss": 0.0311, - "step": 1496 - }, - { - "epoch": 3.96, - "learning_rate": 3.0603217158176944e-05, - "loss": 0.0197, - "step": 1497 - }, - { - "epoch": 3.96, - "learning_rate": 3.0589812332439686e-05, - "loss": 0.0406, - "step": 1498 - }, - { - "epoch": 3.97, - "learning_rate": 3.0576407506702414e-05, - "loss": 0.054, - "step": 1499 - }, - { - "epoch": 3.97, - "learning_rate": 3.056300268096515e-05, - "loss": 0.161, - "step": 1500 - }, - { - "epoch": 3.97, - "learning_rate": 3.0549597855227884e-05, - "loss": 0.0549, - "step": 1501 - }, - { - "epoch": 3.97, - "learning_rate": 3.053619302949062e-05, - "loss": 0.1667, - "step": 1502 - }, - { - "epoch": 3.98, - "learning_rate": 3.0522788203753354e-05, - "loss": 0.1264, - "step": 1503 - }, - { - "epoch": 3.98, - "learning_rate": 3.0509383378016086e-05, - "loss": 0.0133, - "step": 1504 - }, - { - "epoch": 3.98, - "learning_rate": 3.049597855227882e-05, - "loss": 0.0655, - "step": 1505 - }, - { - "epoch": 3.98, - "learning_rate": 3.0482573726541556e-05, - "loss": 0.1054, - "step": 1506 - }, - { - "epoch": 3.99, - "learning_rate": 3.046916890080429e-05, - "loss": 0.0053, - "step": 1507 - }, - { - "epoch": 3.99, - "learning_rate": 3.0455764075067027e-05, - "loss": 0.0347, - "step": 1508 - }, - { - "epoch": 3.99, - "learning_rate": 3.0442359249329762e-05, - "loss": 0.6095, - "step": 1509 - }, - { - "epoch": 3.99, - "learning_rate": 3.0428954423592494e-05, - "loss": 0.1339, - "step": 1510 - }, - { - "epoch": 4.0, - "learning_rate": 3.0415549597855232e-05, - "loss": 0.0088, - "step": 1511 - }, - { - "epoch": 4.0, - "learning_rate": 3.0402144772117964e-05, - "loss": 0.4356, - "step": 1512 - }, - { - "epoch": 4.0, - "eval_f1": 0.7822580645161291, - "eval_loss": 0.6966613531112671, - "eval_runtime": 1.8703, - "eval_samples_per_second": 808.957, - "eval_steps_per_second": 50.794, - "step": 1512 - }, - { - "epoch": 4.0, - "learning_rate": 3.0388739946380702e-05, - "loss": 0.003, - "step": 1513 - }, - { - "epoch": 4.01, - "learning_rate": 3.0375335120643434e-05, - "loss": 0.0067, - "step": 1514 - }, - { - "epoch": 4.01, - "learning_rate": 3.0361930294906166e-05, - "loss": 0.0488, - "step": 1515 - }, - { - "epoch": 4.01, - "learning_rate": 3.0348525469168904e-05, - "loss": 0.0106, - "step": 1516 - }, - { - "epoch": 4.01, - "learning_rate": 3.0335120643431636e-05, - "loss": 0.0098, - "step": 1517 - }, - { - "epoch": 4.02, - "learning_rate": 3.0321715817694375e-05, - "loss": 0.274, - "step": 1518 - }, - { - "epoch": 4.02, - "learning_rate": 3.0308310991957106e-05, - "loss": 0.2007, - "step": 1519 - }, - { - "epoch": 4.02, - "learning_rate": 3.0294906166219838e-05, - "loss": 0.0121, - "step": 1520 - }, - { - "epoch": 4.02, - "learning_rate": 3.0281501340482577e-05, - "loss": 0.0632, - "step": 1521 - }, - { - "epoch": 4.03, - "learning_rate": 3.026809651474531e-05, - "loss": 0.0062, - "step": 1522 - }, - { - "epoch": 4.03, - "learning_rate": 3.0254691689008047e-05, - "loss": 0.0123, - "step": 1523 - }, - { - "epoch": 4.03, - "learning_rate": 3.024128686327078e-05, - "loss": 0.0063, - "step": 1524 - }, - { - "epoch": 4.03, - "learning_rate": 3.022788203753351e-05, - "loss": 0.0102, - "step": 1525 - }, - { - "epoch": 4.04, - "learning_rate": 3.021447721179625e-05, - "loss": 0.0082, - "step": 1526 - }, - { - "epoch": 4.04, - "learning_rate": 3.020107238605898e-05, - "loss": 0.3369, - "step": 1527 - }, - { - "epoch": 4.04, - "learning_rate": 3.018766756032172e-05, - "loss": 0.2587, - "step": 1528 - }, - { - "epoch": 4.04, - "learning_rate": 3.017426273458445e-05, - "loss": 0.0067, - "step": 1529 - }, - { - "epoch": 4.05, - "learning_rate": 3.0160857908847186e-05, - "loss": 0.0021, - "step": 1530 - }, - { - "epoch": 4.05, - "learning_rate": 3.014745308310992e-05, - "loss": 0.0724, - "step": 1531 - }, - { - "epoch": 4.05, - "learning_rate": 3.0134048257372656e-05, - "loss": 0.0074, - "step": 1532 - }, - { - "epoch": 4.06, - "learning_rate": 3.012064343163539e-05, - "loss": 0.0202, - "step": 1533 - }, - { - "epoch": 4.06, - "learning_rate": 3.0107238605898126e-05, - "loss": 0.1435, - "step": 1534 - }, - { - "epoch": 4.06, - "learning_rate": 3.0093833780160858e-05, - "loss": 0.0074, - "step": 1535 - }, - { - "epoch": 4.06, - "learning_rate": 3.0080428954423597e-05, - "loss": 0.4145, - "step": 1536 - }, - { - "epoch": 4.07, - "learning_rate": 3.006702412868633e-05, - "loss": 0.0186, - "step": 1537 - }, - { - "epoch": 4.07, - "learning_rate": 3.0053619302949067e-05, - "loss": 0.1648, - "step": 1538 - }, - { - "epoch": 4.07, - "learning_rate": 3.00402144772118e-05, - "loss": 0.2545, - "step": 1539 - }, - { - "epoch": 4.07, - "learning_rate": 3.002680965147453e-05, - "loss": 0.0016, - "step": 1540 - }, - { - "epoch": 4.08, - "learning_rate": 3.001340482573727e-05, - "loss": 0.0184, - "step": 1541 - }, - { - "epoch": 4.08, - "learning_rate": 3e-05, - "loss": 0.1208, - "step": 1542 - }, - { - "epoch": 4.08, - "learning_rate": 2.998659517426274e-05, - "loss": 0.0021, - "step": 1543 - }, - { - "epoch": 4.08, - "learning_rate": 2.997319034852547e-05, - "loss": 0.0092, - "step": 1544 - }, - { - "epoch": 4.09, - "learning_rate": 2.9959785522788203e-05, - "loss": 0.1514, - "step": 1545 - }, - { - "epoch": 4.09, - "learning_rate": 2.994638069705094e-05, - "loss": 0.0773, - "step": 1546 - }, - { - "epoch": 4.09, - "learning_rate": 2.9932975871313673e-05, - "loss": 0.0093, - "step": 1547 - }, - { - "epoch": 4.1, - "learning_rate": 2.991957104557641e-05, - "loss": 0.0022, - "step": 1548 - }, - { - "epoch": 4.1, - "learning_rate": 2.9906166219839143e-05, - "loss": 0.1765, - "step": 1549 - }, - { - "epoch": 4.1, - "learning_rate": 2.9892761394101875e-05, - "loss": 0.1766, - "step": 1550 - }, - { - "epoch": 4.1, - "learning_rate": 2.9879356568364614e-05, - "loss": 0.0024, - "step": 1551 - }, - { - "epoch": 4.11, - "learning_rate": 2.9865951742627345e-05, - "loss": 0.012, - "step": 1552 - }, - { - "epoch": 4.11, - "learning_rate": 2.9852546916890084e-05, - "loss": 0.0055, - "step": 1553 - }, - { - "epoch": 4.11, - "learning_rate": 2.9839142091152816e-05, - "loss": 0.0088, - "step": 1554 - }, - { - "epoch": 4.11, - "learning_rate": 2.9825737265415547e-05, - "loss": 0.0019, - "step": 1555 - }, - { - "epoch": 4.12, - "learning_rate": 2.9812332439678286e-05, - "loss": 0.0186, - "step": 1556 - }, - { - "epoch": 4.12, - "learning_rate": 2.9798927613941018e-05, - "loss": 0.25, - "step": 1557 - }, - { - "epoch": 4.12, - "learning_rate": 2.9785522788203756e-05, - "loss": 0.0129, - "step": 1558 - }, - { - "epoch": 4.12, - "learning_rate": 2.9772117962466488e-05, - "loss": 0.0048, - "step": 1559 - }, - { - "epoch": 4.13, - "learning_rate": 2.9758713136729223e-05, - "loss": 0.1153, - "step": 1560 - }, - { - "epoch": 4.13, - "learning_rate": 2.9745308310991958e-05, - "loss": 0.1871, - "step": 1561 - }, - { - "epoch": 4.13, - "learning_rate": 2.9731903485254693e-05, - "loss": 0.0087, - "step": 1562 - }, - { - "epoch": 4.13, - "learning_rate": 2.971849865951743e-05, - "loss": 0.0048, - "step": 1563 - }, - { - "epoch": 4.14, - "learning_rate": 2.9705093833780163e-05, - "loss": 0.026, - "step": 1564 - }, - { - "epoch": 4.14, - "learning_rate": 2.9691689008042895e-05, - "loss": 0.3336, - "step": 1565 - }, - { - "epoch": 4.14, - "learning_rate": 2.9678284182305634e-05, - "loss": 0.0015, - "step": 1566 - }, - { - "epoch": 4.15, - "learning_rate": 2.9664879356568365e-05, - "loss": 0.0044, - "step": 1567 - }, - { - "epoch": 4.15, - "learning_rate": 2.9651474530831104e-05, - "loss": 0.0035, - "step": 1568 - }, - { - "epoch": 4.15, - "learning_rate": 2.9638069705093836e-05, - "loss": 0.1206, - "step": 1569 - }, - { - "epoch": 4.15, - "learning_rate": 2.9624664879356567e-05, - "loss": 0.1247, - "step": 1570 - }, - { - "epoch": 4.16, - "learning_rate": 2.9611260053619306e-05, - "loss": 0.0011, - "step": 1571 - }, - { - "epoch": 4.16, - "learning_rate": 2.9597855227882038e-05, - "loss": 0.0023, - "step": 1572 - }, - { - "epoch": 4.16, - "learning_rate": 2.9584450402144776e-05, - "loss": 0.0014, - "step": 1573 - }, - { - "epoch": 4.16, - "learning_rate": 2.9571045576407508e-05, - "loss": 0.2967, - "step": 1574 - }, - { - "epoch": 4.17, - "learning_rate": 2.955764075067024e-05, - "loss": 0.0373, - "step": 1575 - }, - { - "epoch": 4.17, - "learning_rate": 2.9544235924932978e-05, - "loss": 0.3351, - "step": 1576 - }, - { - "epoch": 4.17, - "learning_rate": 2.953083109919571e-05, - "loss": 0.0025, - "step": 1577 - }, - { - "epoch": 4.17, - "learning_rate": 2.951742627345845e-05, - "loss": 0.0025, - "step": 1578 - }, - { - "epoch": 4.18, - "learning_rate": 2.950402144772118e-05, - "loss": 0.0182, - "step": 1579 - }, - { - "epoch": 4.18, - "learning_rate": 2.9490616621983912e-05, - "loss": 0.001, - "step": 1580 - }, - { - "epoch": 4.18, - "learning_rate": 2.947721179624665e-05, - "loss": 0.003, - "step": 1581 - }, - { - "epoch": 4.19, - "learning_rate": 2.9463806970509382e-05, - "loss": 0.0038, - "step": 1582 - }, - { - "epoch": 4.19, - "learning_rate": 2.945040214477212e-05, - "loss": 0.002, - "step": 1583 - }, - { - "epoch": 4.19, - "learning_rate": 2.9436997319034853e-05, - "loss": 0.1688, - "step": 1584 - }, - { - "epoch": 4.19, - "learning_rate": 2.9423592493297584e-05, - "loss": 0.0014, - "step": 1585 - }, - { - "epoch": 4.2, - "learning_rate": 2.9410187667560323e-05, - "loss": 0.2664, - "step": 1586 - }, - { - "epoch": 4.2, - "learning_rate": 2.9396782841823055e-05, - "loss": 0.0012, - "step": 1587 - }, - { - "epoch": 4.2, - "learning_rate": 2.9383378016085793e-05, - "loss": 0.0022, - "step": 1588 - }, - { - "epoch": 4.2, - "learning_rate": 2.9369973190348525e-05, - "loss": 0.0959, - "step": 1589 - }, - { - "epoch": 4.21, - "learning_rate": 2.935656836461126e-05, - "loss": 0.0839, - "step": 1590 - }, - { - "epoch": 4.21, - "learning_rate": 2.9343163538873995e-05, - "loss": 0.7405, - "step": 1591 - }, - { - "epoch": 4.21, - "learning_rate": 2.932975871313673e-05, - "loss": 0.0351, - "step": 1592 - }, - { - "epoch": 4.21, - "learning_rate": 2.9316353887399465e-05, - "loss": 0.0025, - "step": 1593 - }, - { - "epoch": 4.22, - "learning_rate": 2.93029490616622e-05, - "loss": 0.0054, - "step": 1594 - }, - { - "epoch": 4.22, - "learning_rate": 2.9289544235924932e-05, - "loss": 0.0043, - "step": 1595 - }, - { - "epoch": 4.22, - "learning_rate": 2.927613941018767e-05, - "loss": 0.1828, - "step": 1596 - }, - { - "epoch": 4.22, - "learning_rate": 2.9262734584450402e-05, - "loss": 0.0022, - "step": 1597 - }, - { - "epoch": 4.23, - "learning_rate": 2.924932975871314e-05, - "loss": 0.0051, - "step": 1598 - }, - { - "epoch": 4.23, - "learning_rate": 2.9235924932975873e-05, - "loss": 0.0025, - "step": 1599 - }, - { - "epoch": 4.23, - "learning_rate": 2.9222520107238604e-05, - "loss": 0.0018, - "step": 1600 - }, - { - "epoch": 4.24, - "learning_rate": 2.9209115281501343e-05, - "loss": 0.0348, - "step": 1601 - }, - { - "epoch": 4.24, - "learning_rate": 2.9195710455764075e-05, - "loss": 0.207, - "step": 1602 - }, - { - "epoch": 4.24, - "learning_rate": 2.9182305630026813e-05, - "loss": 0.0249, - "step": 1603 - }, - { - "epoch": 4.24, - "learning_rate": 2.9168900804289545e-05, - "loss": 0.0028, - "step": 1604 - }, - { - "epoch": 4.25, - "learning_rate": 2.9155495978552283e-05, - "loss": 0.2604, - "step": 1605 - }, - { - "epoch": 4.25, - "learning_rate": 2.9142091152815015e-05, - "loss": 0.2808, - "step": 1606 - }, - { - "epoch": 4.25, - "learning_rate": 2.9128686327077747e-05, - "loss": 0.0289, - "step": 1607 - }, - { - "epoch": 4.25, - "learning_rate": 2.9115281501340486e-05, - "loss": 0.005, - "step": 1608 - }, - { - "epoch": 4.26, - "learning_rate": 2.9101876675603217e-05, - "loss": 0.7931, - "step": 1609 - }, - { - "epoch": 4.26, - "learning_rate": 2.9088471849865956e-05, - "loss": 0.335, - "step": 1610 - }, - { - "epoch": 4.26, - "learning_rate": 2.9075067024128688e-05, - "loss": 0.2779, - "step": 1611 - }, - { - "epoch": 4.26, - "learning_rate": 2.906166219839142e-05, - "loss": 0.1649, - "step": 1612 - }, - { - "epoch": 4.27, - "learning_rate": 2.9048257372654158e-05, - "loss": 0.0081, - "step": 1613 - }, - { - "epoch": 4.27, - "learning_rate": 2.903485254691689e-05, - "loss": 0.0638, - "step": 1614 - }, - { - "epoch": 4.27, - "learning_rate": 2.9021447721179628e-05, - "loss": 0.016, - "step": 1615 - }, - { - "epoch": 4.28, - "learning_rate": 2.900804289544236e-05, - "loss": 0.0025, - "step": 1616 - }, - { - "epoch": 4.28, - "learning_rate": 2.8994638069705095e-05, - "loss": 0.0249, - "step": 1617 - }, - { - "epoch": 4.28, - "learning_rate": 2.898123324396783e-05, - "loss": 0.0291, - "step": 1618 - }, - { - "epoch": 4.28, - "learning_rate": 2.8967828418230565e-05, - "loss": 0.1773, - "step": 1619 - }, - { - "epoch": 4.29, - "learning_rate": 2.89544235924933e-05, - "loss": 0.3452, - "step": 1620 - }, - { - "epoch": 4.29, - "learning_rate": 2.8941018766756035e-05, - "loss": 0.006, - "step": 1621 - }, - { - "epoch": 4.29, - "learning_rate": 2.8927613941018767e-05, - "loss": 0.0054, - "step": 1622 - }, - { - "epoch": 4.29, - "learning_rate": 2.8914209115281506e-05, - "loss": 0.1852, - "step": 1623 - }, - { - "epoch": 4.3, - "learning_rate": 2.8900804289544237e-05, - "loss": 0.4424, - "step": 1624 - }, - { - "epoch": 4.3, - "learning_rate": 2.8887399463806976e-05, - "loss": 0.0063, - "step": 1625 - }, - { - "epoch": 4.3, - "learning_rate": 2.8873994638069708e-05, - "loss": 0.43, - "step": 1626 - }, - { - "epoch": 4.3, - "learning_rate": 2.886058981233244e-05, - "loss": 0.2283, - "step": 1627 - }, - { - "epoch": 4.31, - "learning_rate": 2.8847184986595178e-05, - "loss": 0.0519, - "step": 1628 - }, - { - "epoch": 4.31, - "learning_rate": 2.883378016085791e-05, - "loss": 0.1797, - "step": 1629 - }, - { - "epoch": 4.31, - "learning_rate": 2.8820375335120648e-05, - "loss": 0.2569, - "step": 1630 - }, - { - "epoch": 4.31, - "learning_rate": 2.880697050938338e-05, - "loss": 0.0024, - "step": 1631 - }, - { - "epoch": 4.32, - "learning_rate": 2.8793565683646112e-05, - "loss": 0.1727, - "step": 1632 - }, - { - "epoch": 4.32, - "learning_rate": 2.878016085790885e-05, - "loss": 0.0091, - "step": 1633 - }, - { - "epoch": 4.32, - "learning_rate": 2.8766756032171582e-05, - "loss": 0.2002, - "step": 1634 - }, - { - "epoch": 4.33, - "learning_rate": 2.875335120643432e-05, - "loss": 0.0217, - "step": 1635 - }, - { - "epoch": 4.33, - "learning_rate": 2.8739946380697052e-05, - "loss": 0.2163, - "step": 1636 - }, - { - "epoch": 4.33, - "learning_rate": 2.8726541554959784e-05, - "loss": 0.0065, - "step": 1637 - }, - { - "epoch": 4.33, - "learning_rate": 2.8713136729222522e-05, - "loss": 0.1567, - "step": 1638 - }, - { - "epoch": 4.34, - "learning_rate": 2.8699731903485254e-05, - "loss": 0.1775, - "step": 1639 - }, - { - "epoch": 4.34, - "learning_rate": 2.8686327077747993e-05, - "loss": 0.0116, - "step": 1640 - }, - { - "epoch": 4.34, - "learning_rate": 2.8672922252010724e-05, - "loss": 0.0114, - "step": 1641 - }, - { - "epoch": 4.34, - "learning_rate": 2.8659517426273456e-05, - "loss": 0.0264, - "step": 1642 - }, - { - "epoch": 4.35, - "learning_rate": 2.8646112600536195e-05, - "loss": 0.0172, - "step": 1643 - }, - { - "epoch": 4.35, - "learning_rate": 2.8632707774798926e-05, - "loss": 0.187, - "step": 1644 - }, - { - "epoch": 4.35, - "learning_rate": 2.8619302949061665e-05, - "loss": 0.009, - "step": 1645 - }, - { - "epoch": 4.35, - "learning_rate": 2.8605898123324397e-05, - "loss": 0.014, - "step": 1646 - }, - { - "epoch": 4.36, - "learning_rate": 2.8592493297587132e-05, - "loss": 0.1643, - "step": 1647 - }, - { - "epoch": 4.36, - "learning_rate": 2.8579088471849867e-05, - "loss": 0.2763, - "step": 1648 - }, - { - "epoch": 4.36, - "learning_rate": 2.8565683646112602e-05, - "loss": 0.0641, - "step": 1649 - }, - { - "epoch": 4.37, - "learning_rate": 2.8552278820375337e-05, - "loss": 0.6128, - "step": 1650 - }, - { - "epoch": 4.37, - "learning_rate": 2.8538873994638072e-05, - "loss": 0.0229, - "step": 1651 - }, - { - "epoch": 4.37, - "learning_rate": 2.8525469168900804e-05, - "loss": 0.0344, - "step": 1652 - }, - { - "epoch": 4.37, - "learning_rate": 2.8512064343163543e-05, - "loss": 0.018, - "step": 1653 - }, - { - "epoch": 4.38, - "learning_rate": 2.8498659517426274e-05, - "loss": 0.191, - "step": 1654 - }, - { - "epoch": 4.38, - "learning_rate": 2.8485254691689013e-05, - "loss": 0.0397, - "step": 1655 - }, - { - "epoch": 4.38, - "learning_rate": 2.8471849865951745e-05, - "loss": 0.0029, - "step": 1656 - }, - { - "epoch": 4.38, - "learning_rate": 2.8458445040214476e-05, - "loss": 0.0034, - "step": 1657 - }, - { - "epoch": 4.39, - "learning_rate": 2.8445040214477215e-05, - "loss": 0.0031, - "step": 1658 - }, - { - "epoch": 4.39, - "learning_rate": 2.8431635388739947e-05, - "loss": 0.4272, - "step": 1659 - }, - { - "epoch": 4.39, - "learning_rate": 2.8418230563002685e-05, - "loss": 0.0042, - "step": 1660 - }, - { - "epoch": 4.39, - "learning_rate": 2.8404825737265417e-05, - "loss": 0.0224, - "step": 1661 - }, - { - "epoch": 4.4, - "learning_rate": 2.839142091152815e-05, - "loss": 0.1021, - "step": 1662 - }, - { - "epoch": 4.4, - "learning_rate": 2.8378016085790887e-05, - "loss": 0.0076, - "step": 1663 - }, - { - "epoch": 4.4, - "learning_rate": 2.836461126005362e-05, - "loss": 0.084, - "step": 1664 - }, - { - "epoch": 4.4, - "learning_rate": 2.8351206434316357e-05, - "loss": 0.0321, - "step": 1665 - }, - { - "epoch": 4.41, - "learning_rate": 2.833780160857909e-05, - "loss": 0.1369, - "step": 1666 - }, - { - "epoch": 4.41, - "learning_rate": 2.832439678284182e-05, - "loss": 0.018, - "step": 1667 - }, - { - "epoch": 4.41, - "learning_rate": 2.831099195710456e-05, - "loss": 0.1886, - "step": 1668 - }, - { - "epoch": 4.42, - "learning_rate": 2.829758713136729e-05, - "loss": 0.0016, - "step": 1669 - }, - { - "epoch": 4.42, - "learning_rate": 2.828418230563003e-05, - "loss": 0.0031, - "step": 1670 - }, - { - "epoch": 4.42, - "learning_rate": 2.827077747989276e-05, - "loss": 0.0043, - "step": 1671 - }, - { - "epoch": 4.42, - "learning_rate": 2.8257372654155497e-05, - "loss": 0.1202, - "step": 1672 - }, - { - "epoch": 4.43, - "learning_rate": 2.8243967828418232e-05, - "loss": 0.1409, - "step": 1673 - }, - { - "epoch": 4.43, - "learning_rate": 2.8230563002680967e-05, - "loss": 0.0821, - "step": 1674 - }, - { - "epoch": 4.43, - "learning_rate": 2.8217158176943702e-05, - "loss": 0.0468, - "step": 1675 - }, - { - "epoch": 4.43, - "learning_rate": 2.8203753351206437e-05, - "loss": 0.0559, - "step": 1676 - }, - { - "epoch": 4.44, - "learning_rate": 2.819034852546917e-05, - "loss": 0.0192, - "step": 1677 - }, - { - "epoch": 4.44, - "learning_rate": 2.8176943699731907e-05, - "loss": 0.0024, - "step": 1678 - }, - { - "epoch": 4.44, - "learning_rate": 2.816353887399464e-05, - "loss": 0.0021, - "step": 1679 - }, - { - "epoch": 4.44, - "learning_rate": 2.8150134048257378e-05, - "loss": 0.0139, - "step": 1680 - }, - { - "epoch": 4.45, - "learning_rate": 2.813672922252011e-05, - "loss": 0.0042, - "step": 1681 - }, - { - "epoch": 4.45, - "learning_rate": 2.812332439678284e-05, - "loss": 0.1666, - "step": 1682 - }, - { - "epoch": 4.45, - "learning_rate": 2.810991957104558e-05, - "loss": 0.5925, - "step": 1683 - }, - { - "epoch": 4.46, - "learning_rate": 2.809651474530831e-05, - "loss": 0.1689, - "step": 1684 - }, - { - "epoch": 4.46, - "learning_rate": 2.808310991957105e-05, - "loss": 0.0053, - "step": 1685 - }, - { - "epoch": 4.46, - "learning_rate": 2.806970509383378e-05, - "loss": 0.0019, - "step": 1686 - }, - { - "epoch": 4.46, - "learning_rate": 2.8056300268096513e-05, - "loss": 0.0632, - "step": 1687 - }, - { - "epoch": 4.47, - "learning_rate": 2.8042895442359252e-05, - "loss": 0.0115, - "step": 1688 - }, - { - "epoch": 4.47, - "learning_rate": 2.8029490616621984e-05, - "loss": 0.002, - "step": 1689 - }, - { - "epoch": 4.47, - "learning_rate": 2.8016085790884722e-05, - "loss": 0.0021, - "step": 1690 - }, - { - "epoch": 4.47, - "learning_rate": 2.8002680965147454e-05, - "loss": 0.0079, - "step": 1691 - }, - { - "epoch": 4.48, - "learning_rate": 2.7989276139410186e-05, - "loss": 0.0016, - "step": 1692 - }, - { - "epoch": 4.48, - "learning_rate": 2.7975871313672924e-05, - "loss": 0.1824, - "step": 1693 - }, - { - "epoch": 4.48, - "learning_rate": 2.7962466487935656e-05, - "loss": 0.1025, - "step": 1694 - }, - { - "epoch": 4.48, - "learning_rate": 2.7949061662198394e-05, - "loss": 0.4274, - "step": 1695 - }, - { - "epoch": 4.49, - "learning_rate": 2.7935656836461126e-05, - "loss": 0.0834, - "step": 1696 - }, - { - "epoch": 4.49, - "learning_rate": 2.7922252010723858e-05, - "loss": 0.6412, - "step": 1697 - }, - { - "epoch": 4.49, - "learning_rate": 2.7908847184986596e-05, - "loss": 0.3051, - "step": 1698 - }, - { - "epoch": 4.49, - "learning_rate": 2.7895442359249328e-05, - "loss": 0.0909, - "step": 1699 - }, - { - "epoch": 4.5, - "learning_rate": 2.7882037533512067e-05, - "loss": 0.2655, - "step": 1700 - }, - { - "epoch": 4.5, - "learning_rate": 2.78686327077748e-05, - "loss": 0.305, - "step": 1701 - }, - { - "epoch": 4.5, - "learning_rate": 2.7855227882037534e-05, - "loss": 0.2733, - "step": 1702 - }, - { - "epoch": 4.51, - "learning_rate": 2.784182305630027e-05, - "loss": 0.0021, - "step": 1703 - }, - { - "epoch": 4.51, - "learning_rate": 2.7828418230563004e-05, - "loss": 0.0072, - "step": 1704 - }, - { - "epoch": 4.51, - "learning_rate": 2.781501340482574e-05, - "loss": 0.0027, - "step": 1705 - }, - { - "epoch": 4.51, - "learning_rate": 2.7801608579088474e-05, - "loss": 0.184, - "step": 1706 - }, - { - "epoch": 4.52, - "learning_rate": 2.7788203753351206e-05, - "loss": 0.0143, - "step": 1707 - }, - { - "epoch": 4.52, - "learning_rate": 2.7774798927613944e-05, - "loss": 0.0297, - "step": 1708 - }, - { - "epoch": 4.52, - "learning_rate": 2.7761394101876676e-05, - "loss": 0.0739, - "step": 1709 - }, - { - "epoch": 4.52, - "learning_rate": 2.7747989276139415e-05, - "loss": 0.0188, - "step": 1710 - }, - { - "epoch": 4.53, - "learning_rate": 2.7734584450402146e-05, - "loss": 0.2487, - "step": 1711 - }, - { - "epoch": 4.53, - "learning_rate": 2.7721179624664878e-05, - "loss": 0.0222, - "step": 1712 - }, - { - "epoch": 4.53, - "learning_rate": 2.7707774798927617e-05, - "loss": 0.0041, - "step": 1713 - }, - { - "epoch": 4.53, - "learning_rate": 2.769436997319035e-05, - "loss": 0.0164, - "step": 1714 - }, - { - "epoch": 4.54, - "learning_rate": 2.7680965147453087e-05, - "loss": 0.0985, - "step": 1715 - }, - { - "epoch": 4.54, - "learning_rate": 2.766756032171582e-05, - "loss": 0.0067, - "step": 1716 - }, - { - "epoch": 4.54, - "learning_rate": 2.765415549597855e-05, - "loss": 0.3304, - "step": 1717 - }, - { - "epoch": 4.54, - "learning_rate": 2.764075067024129e-05, - "loss": 0.006, - "step": 1718 - }, - { - "epoch": 4.55, - "learning_rate": 2.762734584450402e-05, - "loss": 0.0142, - "step": 1719 - }, - { - "epoch": 4.55, - "learning_rate": 2.761394101876676e-05, - "loss": 0.2205, - "step": 1720 - }, - { - "epoch": 4.55, - "learning_rate": 2.760053619302949e-05, - "loss": 0.298, - "step": 1721 - }, - { - "epoch": 4.56, - "learning_rate": 2.7587131367292223e-05, - "loss": 0.0041, - "step": 1722 - }, - { - "epoch": 4.56, - "learning_rate": 2.757372654155496e-05, - "loss": 0.0018, - "step": 1723 - }, - { - "epoch": 4.56, - "learning_rate": 2.7560321715817693e-05, - "loss": 0.0185, - "step": 1724 - }, - { - "epoch": 4.56, - "learning_rate": 2.754691689008043e-05, - "loss": 0.0042, - "step": 1725 - }, - { - "epoch": 4.57, - "learning_rate": 2.7533512064343163e-05, - "loss": 0.036, - "step": 1726 - }, - { - "epoch": 4.57, - "learning_rate": 2.7520107238605898e-05, - "loss": 0.2593, - "step": 1727 - }, - { - "epoch": 4.57, - "learning_rate": 2.7506702412868633e-05, - "loss": 0.0062, - "step": 1728 - }, - { - "epoch": 4.57, - "learning_rate": 2.749329758713137e-05, - "loss": 0.1759, - "step": 1729 - }, - { - "epoch": 4.58, - "learning_rate": 2.7479892761394104e-05, - "loss": 0.0202, - "step": 1730 - }, - { - "epoch": 4.58, - "learning_rate": 2.746648793565684e-05, - "loss": 0.2156, - "step": 1731 - }, - { - "epoch": 4.58, - "learning_rate": 2.7453083109919574e-05, - "loss": 0.4112, - "step": 1732 - }, - { - "epoch": 4.58, - "learning_rate": 2.743967828418231e-05, - "loss": 0.0037, - "step": 1733 - }, - { - "epoch": 4.59, - "learning_rate": 2.742627345844504e-05, - "loss": 0.0186, - "step": 1734 - }, - { - "epoch": 4.59, - "learning_rate": 2.741286863270778e-05, - "loss": 0.0117, - "step": 1735 - }, - { - "epoch": 4.59, - "learning_rate": 2.739946380697051e-05, - "loss": 0.0039, - "step": 1736 - }, - { - "epoch": 4.6, - "learning_rate": 2.738605898123325e-05, - "loss": 0.1185, - "step": 1737 - }, - { - "epoch": 4.6, - "learning_rate": 2.737265415549598e-05, - "loss": 0.0276, - "step": 1738 - }, - { - "epoch": 4.6, - "learning_rate": 2.7359249329758713e-05, - "loss": 0.0041, - "step": 1739 - }, - { - "epoch": 4.6, - "learning_rate": 2.734584450402145e-05, - "loss": 0.0133, - "step": 1740 - }, - { - "epoch": 4.61, - "learning_rate": 2.7332439678284183e-05, - "loss": 0.1042, - "step": 1741 - }, - { - "epoch": 4.61, - "learning_rate": 2.7319034852546922e-05, - "loss": 0.0023, - "step": 1742 - }, - { - "epoch": 4.61, - "learning_rate": 2.7305630026809654e-05, - "loss": 0.1586, - "step": 1743 - }, - { - "epoch": 4.61, - "learning_rate": 2.7292225201072385e-05, - "loss": 0.0258, - "step": 1744 - }, - { - "epoch": 4.62, - "learning_rate": 2.7278820375335124e-05, - "loss": 0.1119, - "step": 1745 - }, - { - "epoch": 4.62, - "learning_rate": 2.7265415549597856e-05, - "loss": 0.1115, - "step": 1746 - }, - { - "epoch": 4.62, - "learning_rate": 2.7252010723860594e-05, - "loss": 0.4607, - "step": 1747 - }, - { - "epoch": 4.62, - "learning_rate": 2.7238605898123326e-05, - "loss": 0.0296, - "step": 1748 - }, - { - "epoch": 4.63, - "learning_rate": 2.7225201072386058e-05, - "loss": 0.0277, - "step": 1749 - }, - { - "epoch": 4.63, - "learning_rate": 2.7211796246648796e-05, - "loss": 0.0777, - "step": 1750 - }, - { - "epoch": 4.63, - "learning_rate": 2.7198391420911528e-05, - "loss": 0.0031, - "step": 1751 - }, - { - "epoch": 4.63, - "learning_rate": 2.7184986595174266e-05, - "loss": 0.2238, - "step": 1752 - }, - { - "epoch": 4.64, - "learning_rate": 2.7171581769436998e-05, - "loss": 0.0409, - "step": 1753 - }, - { - "epoch": 4.64, - "learning_rate": 2.715817694369973e-05, - "loss": 0.0032, - "step": 1754 - }, - { - "epoch": 4.64, - "learning_rate": 2.714477211796247e-05, - "loss": 0.0113, - "step": 1755 - }, - { - "epoch": 4.65, - "learning_rate": 2.71313672922252e-05, - "loss": 0.0204, - "step": 1756 - }, - { - "epoch": 4.65, - "learning_rate": 2.711796246648794e-05, - "loss": 0.0022, - "step": 1757 - }, - { - "epoch": 4.65, - "learning_rate": 2.710455764075067e-05, - "loss": 0.0018, - "step": 1758 - }, - { - "epoch": 4.65, - "learning_rate": 2.7091152815013406e-05, - "loss": 0.263, - "step": 1759 - }, - { - "epoch": 4.66, - "learning_rate": 2.707774798927614e-05, - "loss": 0.0109, - "step": 1760 - }, - { - "epoch": 4.66, - "learning_rate": 2.7064343163538876e-05, - "loss": 0.0653, - "step": 1761 - }, - { - "epoch": 4.66, - "learning_rate": 2.705093833780161e-05, - "loss": 0.0116, - "step": 1762 - }, - { - "epoch": 4.66, - "learning_rate": 2.7037533512064346e-05, - "loss": 0.0063, - "step": 1763 - }, - { - "epoch": 4.67, - "learning_rate": 2.7024128686327078e-05, - "loss": 0.0034, - "step": 1764 - }, - { - "epoch": 4.67, - "learning_rate": 2.7010723860589816e-05, - "loss": 0.0395, - "step": 1765 - }, - { - "epoch": 4.67, - "learning_rate": 2.6997319034852548e-05, - "loss": 0.0014, - "step": 1766 - }, - { - "epoch": 4.67, - "learning_rate": 2.6983914209115287e-05, - "loss": 0.0057, - "step": 1767 - }, - { - "epoch": 4.68, - "learning_rate": 2.697050938337802e-05, - "loss": 0.0018, - "step": 1768 - }, - { - "epoch": 4.68, - "learning_rate": 2.695710455764075e-05, - "loss": 0.012, - "step": 1769 - }, - { - "epoch": 4.68, - "learning_rate": 2.694369973190349e-05, - "loss": 0.0017, - "step": 1770 - }, - { - "epoch": 4.69, - "learning_rate": 2.693029490616622e-05, - "loss": 0.0654, - "step": 1771 - }, - { - "epoch": 4.69, - "learning_rate": 2.691689008042896e-05, - "loss": 0.8002, - "step": 1772 - }, - { - "epoch": 4.69, - "learning_rate": 2.690348525469169e-05, - "loss": 0.0035, - "step": 1773 - }, - { - "epoch": 4.69, - "learning_rate": 2.6890080428954422e-05, - "loss": 0.0051, - "step": 1774 - }, - { - "epoch": 4.7, - "learning_rate": 2.687667560321716e-05, - "loss": 0.0031, - "step": 1775 - }, - { - "epoch": 4.7, - "learning_rate": 2.6863270777479893e-05, - "loss": 0.0142, - "step": 1776 - }, - { - "epoch": 4.7, - "learning_rate": 2.684986595174263e-05, - "loss": 0.0009, - "step": 1777 - }, - { - "epoch": 4.7, - "learning_rate": 2.6836461126005363e-05, - "loss": 0.0015, - "step": 1778 - }, - { - "epoch": 4.71, - "learning_rate": 2.6823056300268095e-05, - "loss": 0.3481, - "step": 1779 - }, - { - "epoch": 4.71, - "learning_rate": 2.6809651474530833e-05, - "loss": 0.3095, - "step": 1780 - }, - { - "epoch": 4.71, - "learning_rate": 2.6796246648793565e-05, - "loss": 0.2567, - "step": 1781 - }, - { - "epoch": 4.71, - "learning_rate": 2.6782841823056303e-05, - "loss": 0.0037, - "step": 1782 - }, - { - "epoch": 4.72, - "learning_rate": 2.6769436997319035e-05, - "loss": 0.001, - "step": 1783 - }, - { - "epoch": 4.72, - "learning_rate": 2.675603217158177e-05, - "loss": 0.0065, - "step": 1784 - }, - { - "epoch": 4.72, - "learning_rate": 2.6742627345844505e-05, - "loss": 0.0029, - "step": 1785 - }, - { - "epoch": 4.72, - "learning_rate": 2.672922252010724e-05, - "loss": 0.6096, - "step": 1786 - }, - { - "epoch": 4.73, - "learning_rate": 2.6715817694369976e-05, - "loss": 0.0127, - "step": 1787 - }, - { - "epoch": 4.73, - "learning_rate": 2.670241286863271e-05, - "loss": 0.0031, - "step": 1788 - }, - { - "epoch": 4.73, - "learning_rate": 2.6689008042895443e-05, - "loss": 0.2463, - "step": 1789 - }, - { - "epoch": 4.74, - "learning_rate": 2.667560321715818e-05, - "loss": 0.1022, - "step": 1790 - }, - { - "epoch": 4.74, - "learning_rate": 2.6662198391420913e-05, - "loss": 0.002, - "step": 1791 - }, - { - "epoch": 4.74, - "learning_rate": 2.664879356568365e-05, - "loss": 0.1576, - "step": 1792 - }, - { - "epoch": 4.74, - "learning_rate": 2.6635388739946383e-05, - "loss": 0.1099, - "step": 1793 - }, - { - "epoch": 4.75, - "learning_rate": 2.6621983914209115e-05, - "loss": 0.1482, - "step": 1794 - }, - { - "epoch": 4.75, - "learning_rate": 2.6608579088471853e-05, - "loss": 0.0007, - "step": 1795 - }, - { - "epoch": 4.75, - "learning_rate": 2.6595174262734585e-05, - "loss": 0.0009, - "step": 1796 - }, - { - "epoch": 4.75, - "learning_rate": 2.6581769436997324e-05, - "loss": 0.005, - "step": 1797 - }, - { - "epoch": 4.76, - "learning_rate": 2.6568364611260055e-05, - "loss": 0.1808, - "step": 1798 - }, - { - "epoch": 4.76, - "learning_rate": 2.6554959785522787e-05, - "loss": 0.0351, - "step": 1799 - }, - { - "epoch": 4.76, - "learning_rate": 2.6541554959785526e-05, - "loss": 0.2555, - "step": 1800 - }, - { - "epoch": 4.76, - "learning_rate": 2.6528150134048257e-05, - "loss": 0.2236, - "step": 1801 - }, - { - "epoch": 4.77, - "learning_rate": 2.6514745308310996e-05, - "loss": 0.3208, - "step": 1802 - }, - { - "epoch": 4.77, - "learning_rate": 2.6501340482573728e-05, - "loss": 0.0202, - "step": 1803 - }, - { - "epoch": 4.77, - "learning_rate": 2.648793565683646e-05, - "loss": 0.0033, - "step": 1804 - }, - { - "epoch": 4.78, - "learning_rate": 2.6474530831099198e-05, - "loss": 0.001, - "step": 1805 - }, - { - "epoch": 4.78, - "learning_rate": 2.646112600536193e-05, - "loss": 0.0019, - "step": 1806 - }, - { - "epoch": 4.78, - "learning_rate": 2.6447721179624668e-05, - "loss": 0.0027, - "step": 1807 - }, - { - "epoch": 4.78, - "learning_rate": 2.64343163538874e-05, - "loss": 0.0051, - "step": 1808 - }, - { - "epoch": 4.79, - "learning_rate": 2.642091152815013e-05, - "loss": 0.1994, - "step": 1809 - }, - { - "epoch": 4.79, - "learning_rate": 2.640750670241287e-05, - "loss": 0.0372, - "step": 1810 - }, - { - "epoch": 4.79, - "learning_rate": 2.6394101876675602e-05, - "loss": 0.0678, - "step": 1811 - }, - { - "epoch": 4.79, - "learning_rate": 2.638069705093834e-05, - "loss": 0.0252, - "step": 1812 - }, - { - "epoch": 4.8, - "learning_rate": 2.6367292225201072e-05, - "loss": 0.0065, - "step": 1813 - }, - { - "epoch": 4.8, - "learning_rate": 2.6353887399463807e-05, - "loss": 0.0045, - "step": 1814 - }, - { - "epoch": 4.8, - "learning_rate": 2.6340482573726542e-05, - "loss": 0.0037, - "step": 1815 - }, - { - "epoch": 4.8, - "learning_rate": 2.6327077747989277e-05, - "loss": 0.0251, - "step": 1816 - }, - { - "epoch": 4.81, - "learning_rate": 2.6313672922252013e-05, - "loss": 0.4196, - "step": 1817 - }, - { - "epoch": 4.81, - "learning_rate": 2.6300268096514748e-05, - "loss": 0.0071, - "step": 1818 - }, - { - "epoch": 4.81, - "learning_rate": 2.628686327077748e-05, - "loss": 0.0787, - "step": 1819 - }, - { - "epoch": 4.81, - "learning_rate": 2.6273458445040218e-05, - "loss": 0.0145, - "step": 1820 - }, - { - "epoch": 4.82, - "learning_rate": 2.626005361930295e-05, - "loss": 0.009, - "step": 1821 - }, - { - "epoch": 4.82, - "learning_rate": 2.6246648793565688e-05, - "loss": 0.0027, - "step": 1822 - }, - { - "epoch": 4.82, - "learning_rate": 2.623324396782842e-05, - "loss": 0.0017, - "step": 1823 - }, - { - "epoch": 4.83, - "learning_rate": 2.6219839142091152e-05, - "loss": 0.4824, - "step": 1824 - }, - { - "epoch": 4.83, - "learning_rate": 2.620643431635389e-05, - "loss": 0.0022, - "step": 1825 - }, - { - "epoch": 4.83, - "learning_rate": 2.6193029490616622e-05, - "loss": 0.3223, - "step": 1826 - }, - { - "epoch": 4.83, - "learning_rate": 2.617962466487936e-05, - "loss": 0.2195, - "step": 1827 - }, - { - "epoch": 4.84, - "learning_rate": 2.6166219839142092e-05, - "loss": 0.0013, - "step": 1828 - }, - { - "epoch": 4.84, - "learning_rate": 2.6152815013404824e-05, - "loss": 0.0343, - "step": 1829 - }, - { - "epoch": 4.84, - "learning_rate": 2.6139410187667563e-05, - "loss": 0.0022, - "step": 1830 - }, - { - "epoch": 4.84, - "learning_rate": 2.6126005361930294e-05, - "loss": 0.0022, - "step": 1831 - }, - { - "epoch": 4.85, - "learning_rate": 2.6112600536193033e-05, - "loss": 0.4116, - "step": 1832 - }, - { - "epoch": 4.85, - "learning_rate": 2.6099195710455765e-05, - "loss": 0.0048, - "step": 1833 - }, - { - "epoch": 4.85, - "learning_rate": 2.6085790884718496e-05, - "loss": 0.5819, - "step": 1834 - }, - { - "epoch": 4.85, - "learning_rate": 2.6072386058981235e-05, - "loss": 0.1985, - "step": 1835 - }, - { - "epoch": 4.86, - "learning_rate": 2.6058981233243967e-05, - "loss": 0.0989, - "step": 1836 - }, - { - "epoch": 4.86, - "learning_rate": 2.6045576407506705e-05, - "loss": 0.341, - "step": 1837 - }, - { - "epoch": 4.86, - "learning_rate": 2.6032171581769437e-05, - "loss": 0.0044, - "step": 1838 - }, - { - "epoch": 4.87, - "learning_rate": 2.601876675603217e-05, - "loss": 0.004, - "step": 1839 - }, - { - "epoch": 4.87, - "learning_rate": 2.6005361930294907e-05, - "loss": 0.2858, - "step": 1840 - }, - { - "epoch": 4.87, - "learning_rate": 2.599195710455764e-05, - "loss": 0.0009, - "step": 1841 - }, - { - "epoch": 4.87, - "learning_rate": 2.5978552278820377e-05, - "loss": 0.0042, - "step": 1842 - }, - { - "epoch": 4.88, - "learning_rate": 2.596514745308311e-05, - "loss": 0.0045, - "step": 1843 - }, - { - "epoch": 4.88, - "learning_rate": 2.5951742627345844e-05, - "loss": 0.0144, - "step": 1844 - }, - { - "epoch": 4.88, - "learning_rate": 2.593833780160858e-05, - "loss": 0.0084, - "step": 1845 - }, - { - "epoch": 4.88, - "learning_rate": 2.5924932975871314e-05, - "loss": 0.4276, - "step": 1846 - }, - { - "epoch": 4.89, - "learning_rate": 2.591152815013405e-05, - "loss": 0.0122, - "step": 1847 - }, - { - "epoch": 4.89, - "learning_rate": 2.5898123324396785e-05, - "loss": 0.0776, - "step": 1848 - }, - { - "epoch": 4.89, - "learning_rate": 2.5884718498659516e-05, - "loss": 0.0117, - "step": 1849 - }, - { - "epoch": 4.89, - "learning_rate": 2.5871313672922255e-05, - "loss": 0.2809, - "step": 1850 - }, - { - "epoch": 4.9, - "learning_rate": 2.5857908847184987e-05, - "loss": 0.0413, - "step": 1851 - }, - { - "epoch": 4.9, - "learning_rate": 2.5844504021447725e-05, - "loss": 0.0187, - "step": 1852 - }, - { - "epoch": 4.9, - "learning_rate": 2.5831099195710457e-05, - "loss": 0.452, - "step": 1853 - }, - { - "epoch": 4.9, - "learning_rate": 2.5817694369973195e-05, - "loss": 0.0206, - "step": 1854 - }, - { - "epoch": 4.91, - "learning_rate": 2.5804289544235927e-05, - "loss": 0.1639, - "step": 1855 - }, - { - "epoch": 4.91, - "learning_rate": 2.579088471849866e-05, - "loss": 0.1865, - "step": 1856 - }, - { - "epoch": 4.91, - "learning_rate": 2.5777479892761398e-05, - "loss": 0.0022, - "step": 1857 - }, - { - "epoch": 4.92, - "learning_rate": 2.576407506702413e-05, - "loss": 0.1167, - "step": 1858 - }, - { - "epoch": 4.92, - "learning_rate": 2.5750670241286868e-05, - "loss": 0.4013, - "step": 1859 - }, - { - "epoch": 4.92, - "learning_rate": 2.57372654155496e-05, - "loss": 0.2355, - "step": 1860 - }, - { - "epoch": 4.92, - "learning_rate": 2.572386058981233e-05, - "loss": 0.0076, - "step": 1861 - }, - { - "epoch": 4.93, - "learning_rate": 2.571045576407507e-05, - "loss": 0.1612, - "step": 1862 - }, - { - "epoch": 4.93, - "learning_rate": 2.56970509383378e-05, - "loss": 0.0047, - "step": 1863 - }, - { - "epoch": 4.93, - "learning_rate": 2.568364611260054e-05, - "loss": 0.1511, - "step": 1864 - }, - { - "epoch": 4.93, - "learning_rate": 2.5670241286863272e-05, - "loss": 0.011, - "step": 1865 - }, - { - "epoch": 4.94, - "learning_rate": 2.5656836461126004e-05, - "loss": 0.1761, - "step": 1866 - }, - { - "epoch": 4.94, - "learning_rate": 2.5643431635388742e-05, - "loss": 0.004, - "step": 1867 - }, - { - "epoch": 4.94, - "learning_rate": 2.5630026809651474e-05, - "loss": 0.0036, - "step": 1868 - }, - { - "epoch": 4.94, - "learning_rate": 2.5616621983914212e-05, - "loss": 0.4345, - "step": 1869 - }, - { - "epoch": 4.95, - "learning_rate": 2.5603217158176944e-05, - "loss": 0.0034, - "step": 1870 - }, - { - "epoch": 4.95, - "learning_rate": 2.558981233243968e-05, - "loss": 0.1269, - "step": 1871 - }, - { - "epoch": 4.95, - "learning_rate": 2.5576407506702414e-05, - "loss": 0.183, - "step": 1872 - }, - { - "epoch": 4.96, - "learning_rate": 2.556300268096515e-05, - "loss": 0.008, - "step": 1873 - }, - { - "epoch": 4.96, - "learning_rate": 2.5549597855227885e-05, - "loss": 0.0035, - "step": 1874 - }, - { - "epoch": 4.96, - "learning_rate": 2.553619302949062e-05, - "loss": 0.0133, - "step": 1875 - }, - { - "epoch": 4.96, - "learning_rate": 2.552278820375335e-05, - "loss": 0.2156, - "step": 1876 - }, - { - "epoch": 4.97, - "learning_rate": 2.550938337801609e-05, - "loss": 0.0043, - "step": 1877 - }, - { - "epoch": 4.97, - "learning_rate": 2.549597855227882e-05, - "loss": 0.2614, - "step": 1878 - }, - { - "epoch": 4.97, - "learning_rate": 2.548257372654156e-05, - "loss": 0.0208, - "step": 1879 - }, - { - "epoch": 4.97, - "learning_rate": 2.5469168900804292e-05, - "loss": 0.0228, - "step": 1880 - }, - { - "epoch": 4.98, - "learning_rate": 2.5455764075067024e-05, - "loss": 0.0105, - "step": 1881 - }, - { - "epoch": 4.98, - "learning_rate": 2.5442359249329762e-05, - "loss": 0.0108, - "step": 1882 - }, - { - "epoch": 4.98, - "learning_rate": 2.5428954423592494e-05, - "loss": 0.3828, - "step": 1883 - }, - { - "epoch": 4.98, - "learning_rate": 2.5415549597855232e-05, - "loss": 0.0093, - "step": 1884 - }, - { - "epoch": 4.99, - "learning_rate": 2.5402144772117964e-05, - "loss": 0.0231, - "step": 1885 - }, - { - "epoch": 4.99, - "learning_rate": 2.5388739946380696e-05, - "loss": 0.0082, - "step": 1886 - }, - { - "epoch": 4.99, - "learning_rate": 2.5375335120643434e-05, - "loss": 0.1796, - "step": 1887 - }, - { - "epoch": 4.99, - "learning_rate": 2.5361930294906166e-05, - "loss": 0.0753, - "step": 1888 - }, - { - "epoch": 5.0, - "learning_rate": 2.5348525469168905e-05, - "loss": 0.0142, - "step": 1889 - }, - { - "epoch": 5.0, - "learning_rate": 2.5335120643431636e-05, - "loss": 0.0047, - "step": 1890 - }, - { - "epoch": 5.0, - "eval_f1": 0.7775974025974025, - "eval_loss": 0.953689694404602, - "eval_runtime": 1.8696, - "eval_samples_per_second": 809.285, - "eval_steps_per_second": 50.814, - "step": 1890 - }, - { - "epoch": 5.0, - "learning_rate": 2.5321715817694368e-05, - "loss": 0.0014, - "step": 1891 - }, - { - "epoch": 5.01, - "learning_rate": 2.5308310991957107e-05, - "loss": 0.0487, - "step": 1892 - }, - { - "epoch": 5.01, - "learning_rate": 2.529490616621984e-05, - "loss": 0.0037, - "step": 1893 - }, - { - "epoch": 5.01, - "learning_rate": 2.5281501340482577e-05, - "loss": 0.0512, - "step": 1894 - }, - { - "epoch": 5.01, - "learning_rate": 2.526809651474531e-05, - "loss": 0.134, - "step": 1895 - }, - { - "epoch": 5.02, - "learning_rate": 2.525469168900804e-05, - "loss": 0.3762, - "step": 1896 - }, - { - "epoch": 5.02, - "learning_rate": 2.524128686327078e-05, - "loss": 0.0011, - "step": 1897 - }, - { - "epoch": 5.02, - "learning_rate": 2.522788203753351e-05, - "loss": 0.0023, - "step": 1898 - }, - { - "epoch": 5.02, - "learning_rate": 2.521447721179625e-05, - "loss": 0.0526, - "step": 1899 - }, - { - "epoch": 5.03, - "learning_rate": 2.520107238605898e-05, - "loss": 0.0553, - "step": 1900 - }, - { - "epoch": 5.03, - "learning_rate": 2.5187667560321716e-05, - "loss": 0.1773, - "step": 1901 - }, - { - "epoch": 5.03, - "learning_rate": 2.517426273458445e-05, - "loss": 0.451, - "step": 1902 - }, - { - "epoch": 5.03, - "learning_rate": 2.5160857908847186e-05, - "loss": 0.0217, - "step": 1903 - }, - { - "epoch": 5.04, - "learning_rate": 2.514745308310992e-05, - "loss": 0.0728, - "step": 1904 - }, - { - "epoch": 5.04, - "learning_rate": 2.5134048257372657e-05, - "loss": 0.0009, - "step": 1905 - }, - { - "epoch": 5.04, - "learning_rate": 2.512064343163539e-05, - "loss": 0.1018, - "step": 1906 - }, - { - "epoch": 5.04, - "learning_rate": 2.5107238605898127e-05, - "loss": 0.0012, - "step": 1907 - }, - { - "epoch": 5.05, - "learning_rate": 2.509383378016086e-05, - "loss": 0.004, - "step": 1908 - }, - { - "epoch": 5.05, - "learning_rate": 2.5080428954423597e-05, - "loss": 0.0012, - "step": 1909 - }, - { - "epoch": 5.05, - "learning_rate": 2.506702412868633e-05, - "loss": 0.0128, - "step": 1910 - }, - { - "epoch": 5.06, - "learning_rate": 2.505361930294906e-05, - "loss": 0.1116, - "step": 1911 - }, - { - "epoch": 5.06, - "learning_rate": 2.50402144772118e-05, - "loss": 0.0011, - "step": 1912 - }, - { - "epoch": 5.06, - "learning_rate": 2.502680965147453e-05, - "loss": 0.0011, - "step": 1913 - }, - { - "epoch": 5.06, - "learning_rate": 2.501340482573727e-05, - "loss": 0.0897, - "step": 1914 - }, - { - "epoch": 5.07, - "learning_rate": 2.5e-05, - "loss": 0.0014, - "step": 1915 - }, - { - "epoch": 5.07, - "learning_rate": 2.4986595174262736e-05, - "loss": 0.0918, - "step": 1916 - }, - { - "epoch": 5.07, - "learning_rate": 2.497319034852547e-05, - "loss": 0.0026, - "step": 1917 - }, - { - "epoch": 5.07, - "learning_rate": 2.4959785522788203e-05, - "loss": 0.0225, - "step": 1918 - }, - { - "epoch": 5.08, - "learning_rate": 2.494638069705094e-05, - "loss": 0.2655, - "step": 1919 - }, - { - "epoch": 5.08, - "learning_rate": 2.4932975871313673e-05, - "loss": 0.0029, - "step": 1920 - }, - { - "epoch": 5.08, - "learning_rate": 2.491957104557641e-05, - "loss": 0.0006, - "step": 1921 - }, - { - "epoch": 5.08, - "learning_rate": 2.4906166219839144e-05, - "loss": 0.0008, - "step": 1922 - }, - { - "epoch": 5.09, - "learning_rate": 2.4892761394101875e-05, - "loss": 0.0012, - "step": 1923 - }, - { - "epoch": 5.09, - "learning_rate": 2.487935656836461e-05, - "loss": 0.0013, - "step": 1924 - }, - { - "epoch": 5.09, - "learning_rate": 2.4865951742627346e-05, - "loss": 0.0524, - "step": 1925 - }, - { - "epoch": 5.1, - "learning_rate": 2.485254691689008e-05, - "loss": 0.0059, - "step": 1926 - }, - { - "epoch": 5.1, - "learning_rate": 2.4839142091152816e-05, - "loss": 0.0026, - "step": 1927 - }, - { - "epoch": 5.1, - "learning_rate": 2.482573726541555e-05, - "loss": 0.0015, - "step": 1928 - }, - { - "epoch": 5.1, - "learning_rate": 2.4812332439678286e-05, - "loss": 0.0073, - "step": 1929 - }, - { - "epoch": 5.11, - "learning_rate": 2.479892761394102e-05, - "loss": 0.0008, - "step": 1930 - }, - { - "epoch": 5.11, - "learning_rate": 2.4785522788203757e-05, - "loss": 0.1519, - "step": 1931 - }, - { - "epoch": 5.11, - "learning_rate": 2.477211796246649e-05, - "loss": 0.008, - "step": 1932 - }, - { - "epoch": 5.11, - "learning_rate": 2.4758713136729223e-05, - "loss": 0.0009, - "step": 1933 - }, - { - "epoch": 5.12, - "learning_rate": 2.474530831099196e-05, - "loss": 0.0299, - "step": 1934 - }, - { - "epoch": 5.12, - "learning_rate": 2.4731903485254694e-05, - "loss": 0.1637, - "step": 1935 - }, - { - "epoch": 5.12, - "learning_rate": 2.471849865951743e-05, - "loss": 0.0369, - "step": 1936 - }, - { - "epoch": 5.12, - "learning_rate": 2.4705093833780164e-05, - "loss": 0.0057, - "step": 1937 - }, - { - "epoch": 5.13, - "learning_rate": 2.4691689008042896e-05, - "loss": 0.0035, - "step": 1938 - }, - { - "epoch": 5.13, - "learning_rate": 2.467828418230563e-05, - "loss": 0.0011, - "step": 1939 - }, - { - "epoch": 5.13, - "learning_rate": 2.4664879356568366e-05, - "loss": 0.0018, - "step": 1940 - }, - { - "epoch": 5.13, - "learning_rate": 2.46514745308311e-05, - "loss": 0.0013, - "step": 1941 - }, - { - "epoch": 5.14, - "learning_rate": 2.4638069705093836e-05, - "loss": 0.0779, - "step": 1942 - }, - { - "epoch": 5.14, - "learning_rate": 2.4624664879356568e-05, - "loss": 0.1762, - "step": 1943 - }, - { - "epoch": 5.14, - "learning_rate": 2.4611260053619303e-05, - "loss": 0.0006, - "step": 1944 - }, - { - "epoch": 5.15, - "learning_rate": 2.4597855227882038e-05, - "loss": 0.0037, - "step": 1945 - }, - { - "epoch": 5.15, - "learning_rate": 2.4584450402144773e-05, - "loss": 0.0005, - "step": 1946 - }, - { - "epoch": 5.15, - "learning_rate": 2.457104557640751e-05, - "loss": 0.0397, - "step": 1947 - }, - { - "epoch": 5.15, - "learning_rate": 2.4557640750670244e-05, - "loss": 0.0082, - "step": 1948 - }, - { - "epoch": 5.16, - "learning_rate": 2.4544235924932975e-05, - "loss": 0.0008, - "step": 1949 - }, - { - "epoch": 5.16, - "learning_rate": 2.453083109919571e-05, - "loss": 0.0219, - "step": 1950 - }, - { - "epoch": 5.16, - "learning_rate": 2.4517426273458446e-05, - "loss": 0.3966, - "step": 1951 - }, - { - "epoch": 5.16, - "learning_rate": 2.450402144772118e-05, - "loss": 0.0011, - "step": 1952 - }, - { - "epoch": 5.17, - "learning_rate": 2.4490616621983916e-05, - "loss": 0.3447, - "step": 1953 - }, - { - "epoch": 5.17, - "learning_rate": 2.4477211796246648e-05, - "loss": 0.0006, - "step": 1954 - }, - { - "epoch": 5.17, - "learning_rate": 2.4463806970509383e-05, - "loss": 0.0011, - "step": 1955 - }, - { - "epoch": 5.17, - "learning_rate": 2.4450402144772118e-05, - "loss": 0.0013, - "step": 1956 - }, - { - "epoch": 5.18, - "learning_rate": 2.4436997319034853e-05, - "loss": 0.1495, - "step": 1957 - }, - { - "epoch": 5.18, - "learning_rate": 2.4423592493297588e-05, - "loss": 0.0005, - "step": 1958 - }, - { - "epoch": 5.18, - "learning_rate": 2.4410187667560323e-05, - "loss": 0.3345, - "step": 1959 - }, - { - "epoch": 5.19, - "learning_rate": 2.439678284182306e-05, - "loss": 0.0048, - "step": 1960 - }, - { - "epoch": 5.19, - "learning_rate": 2.4383378016085793e-05, - "loss": 0.001, - "step": 1961 - }, - { - "epoch": 5.19, - "learning_rate": 2.436997319034853e-05, - "loss": 0.0025, - "step": 1962 - }, - { - "epoch": 5.19, - "learning_rate": 2.4356568364611264e-05, - "loss": 0.3215, - "step": 1963 - }, - { - "epoch": 5.2, - "learning_rate": 2.4343163538873995e-05, - "loss": 0.0197, - "step": 1964 - }, - { - "epoch": 5.2, - "learning_rate": 2.432975871313673e-05, - "loss": 0.0018, - "step": 1965 - }, - { - "epoch": 5.2, - "learning_rate": 2.4316353887399466e-05, - "loss": 0.1012, - "step": 1966 - }, - { - "epoch": 5.2, - "learning_rate": 2.43029490616622e-05, - "loss": 0.0179, - "step": 1967 - }, - { - "epoch": 5.21, - "learning_rate": 2.4289544235924936e-05, - "loss": 0.0032, - "step": 1968 - }, - { - "epoch": 5.21, - "learning_rate": 2.4276139410187668e-05, - "loss": 0.0011, - "step": 1969 - }, - { - "epoch": 5.21, - "learning_rate": 2.4262734584450403e-05, - "loss": 0.4875, - "step": 1970 - }, - { - "epoch": 5.21, - "learning_rate": 2.4249329758713138e-05, - "loss": 0.2791, - "step": 1971 - }, - { - "epoch": 5.22, - "learning_rate": 2.4235924932975873e-05, - "loss": 0.0011, - "step": 1972 - }, - { - "epoch": 5.22, - "learning_rate": 2.4222520107238608e-05, - "loss": 0.0011, - "step": 1973 - }, - { - "epoch": 5.22, - "learning_rate": 2.420911528150134e-05, - "loss": 0.0976, - "step": 1974 - }, - { - "epoch": 5.22, - "learning_rate": 2.4195710455764075e-05, - "loss": 0.3669, - "step": 1975 - }, - { - "epoch": 5.23, - "learning_rate": 2.418230563002681e-05, - "loss": 0.0022, - "step": 1976 - }, - { - "epoch": 5.23, - "learning_rate": 2.4168900804289545e-05, - "loss": 0.0015, - "step": 1977 - }, - { - "epoch": 5.23, - "learning_rate": 2.415549597855228e-05, - "loss": 0.0014, - "step": 1978 - }, - { - "epoch": 5.24, - "learning_rate": 2.4142091152815012e-05, - "loss": 0.0433, - "step": 1979 - }, - { - "epoch": 5.24, - "learning_rate": 2.4128686327077747e-05, - "loss": 0.0019, - "step": 1980 - }, - { - "epoch": 5.24, - "learning_rate": 2.4115281501340483e-05, - "loss": 0.0007, - "step": 1981 - }, - { - "epoch": 5.24, - "learning_rate": 2.4101876675603218e-05, - "loss": 0.0136, - "step": 1982 - }, - { - "epoch": 5.25, - "learning_rate": 2.4088471849865953e-05, - "loss": 0.1744, - "step": 1983 - }, - { - "epoch": 5.25, - "learning_rate": 2.4075067024128688e-05, - "loss": 0.1557, - "step": 1984 - }, - { - "epoch": 5.25, - "learning_rate": 2.4061662198391423e-05, - "loss": 0.1192, - "step": 1985 - }, - { - "epoch": 5.25, - "learning_rate": 2.4048257372654158e-05, - "loss": 0.0406, - "step": 1986 - }, - { - "epoch": 5.26, - "learning_rate": 2.4034852546916893e-05, - "loss": 0.2243, - "step": 1987 - }, - { - "epoch": 5.26, - "learning_rate": 2.402144772117963e-05, - "loss": 0.0021, - "step": 1988 - }, - { - "epoch": 5.26, - "learning_rate": 2.400804289544236e-05, - "loss": 0.002, - "step": 1989 - }, - { - "epoch": 5.26, - "learning_rate": 2.3994638069705095e-05, - "loss": 0.077, - "step": 1990 - }, - { - "epoch": 5.27, - "learning_rate": 2.398123324396783e-05, - "loss": 0.0378, - "step": 1991 - }, - { - "epoch": 5.27, - "learning_rate": 2.3967828418230566e-05, - "loss": 0.012, - "step": 1992 - }, - { - "epoch": 5.27, - "learning_rate": 2.39544235924933e-05, - "loss": 0.1386, - "step": 1993 - }, - { - "epoch": 5.28, - "learning_rate": 2.3941018766756032e-05, - "loss": 0.002, - "step": 1994 - }, - { - "epoch": 5.28, - "learning_rate": 2.3927613941018768e-05, - "loss": 0.0008, - "step": 1995 - }, - { - "epoch": 5.28, - "learning_rate": 2.3914209115281503e-05, - "loss": 0.0021, - "step": 1996 - }, - { - "epoch": 5.28, - "learning_rate": 2.3900804289544238e-05, - "loss": 0.022, - "step": 1997 - }, - { - "epoch": 5.29, - "learning_rate": 2.3887399463806973e-05, - "loss": 0.0015, - "step": 1998 - }, - { - "epoch": 5.29, - "learning_rate": 2.3873994638069705e-05, - "loss": 0.1486, - "step": 1999 - }, - { - "epoch": 5.29, - "learning_rate": 2.386058981233244e-05, - "loss": 0.2586, - "step": 2000 - }, - { - "epoch": 5.29, - "learning_rate": 2.3847184986595175e-05, - "loss": 0.0088, - "step": 2001 - }, - { - "epoch": 5.3, - "learning_rate": 2.383378016085791e-05, - "loss": 0.0044, - "step": 2002 - }, - { - "epoch": 5.3, - "learning_rate": 2.3820375335120645e-05, - "loss": 0.0015, - "step": 2003 - }, - { - "epoch": 5.3, - "learning_rate": 2.3806970509383377e-05, - "loss": 0.0008, - "step": 2004 - }, - { - "epoch": 5.3, - "learning_rate": 2.3793565683646112e-05, - "loss": 0.212, - "step": 2005 - }, - { - "epoch": 5.31, - "learning_rate": 2.3780160857908847e-05, - "loss": 0.0005, - "step": 2006 - }, - { - "epoch": 5.31, - "learning_rate": 2.3766756032171582e-05, - "loss": 0.1511, - "step": 2007 - }, - { - "epoch": 5.31, - "learning_rate": 2.3753351206434318e-05, - "loss": 0.0023, - "step": 2008 - }, - { - "epoch": 5.31, - "learning_rate": 2.3739946380697053e-05, - "loss": 0.1544, - "step": 2009 - }, - { - "epoch": 5.32, - "learning_rate": 2.3726541554959784e-05, - "loss": 0.0306, - "step": 2010 - }, - { - "epoch": 5.32, - "learning_rate": 2.371313672922252e-05, - "loss": 0.1005, - "step": 2011 - }, - { - "epoch": 5.32, - "learning_rate": 2.3699731903485255e-05, - "loss": 0.0744, - "step": 2012 - }, - { - "epoch": 5.33, - "learning_rate": 2.368632707774799e-05, - "loss": 0.0622, - "step": 2013 - }, - { - "epoch": 5.33, - "learning_rate": 2.3672922252010725e-05, - "loss": 0.0192, - "step": 2014 - }, - { - "epoch": 5.33, - "learning_rate": 2.365951742627346e-05, - "loss": 0.016, - "step": 2015 - }, - { - "epoch": 5.33, - "learning_rate": 2.3646112600536195e-05, - "loss": 0.293, - "step": 2016 - }, - { - "epoch": 5.34, - "learning_rate": 2.363270777479893e-05, - "loss": 0.0114, - "step": 2017 - }, - { - "epoch": 5.34, - "learning_rate": 2.3619302949061665e-05, - "loss": 0.1254, - "step": 2018 - }, - { - "epoch": 5.34, - "learning_rate": 2.36058981233244e-05, - "loss": 0.0638, - "step": 2019 - }, - { - "epoch": 5.34, - "learning_rate": 2.3592493297587132e-05, - "loss": 0.0192, - "step": 2020 - }, - { - "epoch": 5.35, - "learning_rate": 2.3579088471849867e-05, - "loss": 0.0069, - "step": 2021 - }, - { - "epoch": 5.35, - "learning_rate": 2.3565683646112603e-05, - "loss": 0.0573, - "step": 2022 - }, - { - "epoch": 5.35, - "learning_rate": 2.3552278820375338e-05, - "loss": 0.0039, - "step": 2023 - }, - { - "epoch": 5.35, - "learning_rate": 2.3538873994638073e-05, - "loss": 0.0079, - "step": 2024 - }, - { - "epoch": 5.36, - "learning_rate": 2.3525469168900805e-05, - "loss": 0.0063, - "step": 2025 - }, - { - "epoch": 5.36, - "learning_rate": 2.351206434316354e-05, - "loss": 0.0032, - "step": 2026 - }, - { - "epoch": 5.36, - "learning_rate": 2.3498659517426275e-05, - "loss": 0.1018, - "step": 2027 - }, - { - "epoch": 5.37, - "learning_rate": 2.348525469168901e-05, - "loss": 0.0017, - "step": 2028 - }, - { - "epoch": 5.37, - "learning_rate": 2.3471849865951745e-05, - "loss": 0.0203, - "step": 2029 - }, - { - "epoch": 5.37, - "learning_rate": 2.3458445040214477e-05, - "loss": 0.0008, - "step": 2030 - }, - { - "epoch": 5.37, - "learning_rate": 2.3445040214477212e-05, - "loss": 0.3145, - "step": 2031 - }, - { - "epoch": 5.38, - "learning_rate": 2.3431635388739947e-05, - "loss": 0.2476, - "step": 2032 - }, - { - "epoch": 5.38, - "learning_rate": 2.3418230563002682e-05, - "loss": 0.0573, - "step": 2033 - }, - { - "epoch": 5.38, - "learning_rate": 2.3404825737265417e-05, - "loss": 0.0008, - "step": 2034 - }, - { - "epoch": 5.38, - "learning_rate": 2.339142091152815e-05, - "loss": 0.0636, - "step": 2035 - }, - { - "epoch": 5.39, - "learning_rate": 2.3378016085790884e-05, - "loss": 0.001, - "step": 2036 - }, - { - "epoch": 5.39, - "learning_rate": 2.336461126005362e-05, - "loss": 0.0079, - "step": 2037 - }, - { - "epoch": 5.39, - "learning_rate": 2.3351206434316355e-05, - "loss": 0.0752, - "step": 2038 - }, - { - "epoch": 5.39, - "learning_rate": 2.333780160857909e-05, - "loss": 0.0024, - "step": 2039 - }, - { - "epoch": 5.4, - "learning_rate": 2.332439678284182e-05, - "loss": 0.0061, - "step": 2040 - }, - { - "epoch": 5.4, - "learning_rate": 2.3310991957104557e-05, - "loss": 0.2882, - "step": 2041 - }, - { - "epoch": 5.4, - "learning_rate": 2.329758713136729e-05, - "loss": 0.02, - "step": 2042 - }, - { - "epoch": 5.4, - "learning_rate": 2.3284182305630027e-05, - "loss": 0.018, - "step": 2043 - }, - { - "epoch": 5.41, - "learning_rate": 2.3270777479892762e-05, - "loss": 0.0125, - "step": 2044 - }, - { - "epoch": 5.41, - "learning_rate": 2.3257372654155497e-05, - "loss": 0.0007, - "step": 2045 - }, - { - "epoch": 5.41, - "learning_rate": 2.3243967828418232e-05, - "loss": 0.3849, - "step": 2046 - }, - { - "epoch": 5.42, - "learning_rate": 2.3230563002680967e-05, - "loss": 0.0011, - "step": 2047 - }, - { - "epoch": 5.42, - "learning_rate": 2.3217158176943702e-05, - "loss": 0.1235, - "step": 2048 - }, - { - "epoch": 5.42, - "learning_rate": 2.3203753351206438e-05, - "loss": 0.0006, - "step": 2049 - }, - { - "epoch": 5.42, - "learning_rate": 2.319034852546917e-05, - "loss": 0.0006, - "step": 2050 - }, - { - "epoch": 5.43, - "learning_rate": 2.3176943699731904e-05, - "loss": 0.0011, - "step": 2051 - }, - { - "epoch": 5.43, - "learning_rate": 2.316353887399464e-05, - "loss": 0.0011, - "step": 2052 - }, - { - "epoch": 5.43, - "learning_rate": 2.3150134048257375e-05, - "loss": 0.0032, - "step": 2053 - }, - { - "epoch": 5.43, - "learning_rate": 2.313672922252011e-05, - "loss": 0.0718, - "step": 2054 - }, - { - "epoch": 5.44, - "learning_rate": 2.312332439678284e-05, - "loss": 0.0052, - "step": 2055 - }, - { - "epoch": 5.44, - "learning_rate": 2.3109919571045577e-05, - "loss": 0.0026, - "step": 2056 - }, - { - "epoch": 5.44, - "learning_rate": 2.3096514745308312e-05, - "loss": 0.1854, - "step": 2057 - }, - { - "epoch": 5.44, - "learning_rate": 2.3083109919571047e-05, - "loss": 0.0008, - "step": 2058 - }, - { - "epoch": 5.45, - "learning_rate": 2.3069705093833782e-05, - "loss": 0.2671, - "step": 2059 - }, - { - "epoch": 5.45, - "learning_rate": 2.3056300268096514e-05, - "loss": 0.0064, - "step": 2060 - }, - { - "epoch": 5.45, - "learning_rate": 2.304289544235925e-05, - "loss": 0.0012, - "step": 2061 - }, - { - "epoch": 5.46, - "learning_rate": 2.3029490616621984e-05, - "loss": 0.0006, - "step": 2062 - }, - { - "epoch": 5.46, - "learning_rate": 2.301608579088472e-05, - "loss": 0.0008, - "step": 2063 - }, - { - "epoch": 5.46, - "learning_rate": 2.3002680965147454e-05, - "loss": 0.4998, - "step": 2064 - }, - { - "epoch": 5.46, - "learning_rate": 2.2989276139410186e-05, - "loss": 0.0037, - "step": 2065 - }, - { - "epoch": 5.47, - "learning_rate": 2.297587131367292e-05, - "loss": 0.0029, - "step": 2066 - }, - { - "epoch": 5.47, - "learning_rate": 2.2962466487935656e-05, - "loss": 0.043, - "step": 2067 - }, - { - "epoch": 5.47, - "learning_rate": 2.294906166219839e-05, - "loss": 0.0451, - "step": 2068 - }, - { - "epoch": 5.47, - "learning_rate": 2.2935656836461127e-05, - "loss": 0.0009, - "step": 2069 - }, - { - "epoch": 5.48, - "learning_rate": 2.2922252010723862e-05, - "loss": 0.002, - "step": 2070 - }, - { - "epoch": 5.48, - "learning_rate": 2.2908847184986597e-05, - "loss": 0.2744, - "step": 2071 - }, - { - "epoch": 5.48, - "learning_rate": 2.2895442359249332e-05, - "loss": 0.0146, - "step": 2072 - }, - { - "epoch": 5.48, - "learning_rate": 2.2882037533512067e-05, - "loss": 0.0011, - "step": 2073 - }, - { - "epoch": 5.49, - "learning_rate": 2.2868632707774802e-05, - "loss": 0.0421, - "step": 2074 - }, - { - "epoch": 5.49, - "learning_rate": 2.2855227882037537e-05, - "loss": 0.1518, - "step": 2075 - }, - { - "epoch": 5.49, - "learning_rate": 2.284182305630027e-05, - "loss": 0.0072, - "step": 2076 - }, - { - "epoch": 5.49, - "learning_rate": 2.2828418230563004e-05, - "loss": 0.2781, - "step": 2077 - }, - { - "epoch": 5.5, - "learning_rate": 2.281501340482574e-05, - "loss": 0.004, - "step": 2078 - }, - { - "epoch": 5.5, - "learning_rate": 2.2801608579088475e-05, - "loss": 0.1029, - "step": 2079 - }, - { - "epoch": 5.5, - "learning_rate": 2.278820375335121e-05, - "loss": 0.0526, - "step": 2080 - }, - { - "epoch": 5.51, - "learning_rate": 2.277479892761394e-05, - "loss": 0.0011, - "step": 2081 - }, - { - "epoch": 5.51, - "learning_rate": 2.2761394101876677e-05, - "loss": 0.0139, - "step": 2082 - }, - { - "epoch": 5.51, - "learning_rate": 2.274798927613941e-05, - "loss": 0.0509, - "step": 2083 - }, - { - "epoch": 5.51, - "learning_rate": 2.2734584450402147e-05, - "loss": 0.0042, - "step": 2084 - }, - { - "epoch": 5.52, - "learning_rate": 2.2721179624664882e-05, - "loss": 0.1964, - "step": 2085 - }, - { - "epoch": 5.52, - "learning_rate": 2.2707774798927614e-05, - "loss": 0.0083, - "step": 2086 - }, - { - "epoch": 5.52, - "learning_rate": 2.269436997319035e-05, - "loss": 0.101, - "step": 2087 - }, - { - "epoch": 5.52, - "learning_rate": 2.2680965147453084e-05, - "loss": 0.0094, - "step": 2088 - }, - { - "epoch": 5.53, - "learning_rate": 2.266756032171582e-05, - "loss": 0.1433, - "step": 2089 - }, - { - "epoch": 5.53, - "learning_rate": 2.2654155495978554e-05, - "loss": 0.0091, - "step": 2090 - }, - { - "epoch": 5.53, - "learning_rate": 2.2640750670241286e-05, - "loss": 0.0018, - "step": 2091 - }, - { - "epoch": 5.53, - "learning_rate": 2.262734584450402e-05, - "loss": 0.001, - "step": 2092 - }, - { - "epoch": 5.54, - "learning_rate": 2.2613941018766756e-05, - "loss": 0.3507, - "step": 2093 - }, - { - "epoch": 5.54, - "learning_rate": 2.260053619302949e-05, - "loss": 0.0527, - "step": 2094 - }, - { - "epoch": 5.54, - "learning_rate": 2.2587131367292226e-05, - "loss": 0.0015, - "step": 2095 - }, - { - "epoch": 5.54, - "learning_rate": 2.2573726541554958e-05, - "loss": 0.0195, - "step": 2096 - }, - { - "epoch": 5.55, - "learning_rate": 2.2560321715817693e-05, - "loss": 0.0007, - "step": 2097 - }, - { - "epoch": 5.55, - "learning_rate": 2.254691689008043e-05, - "loss": 0.001, - "step": 2098 - }, - { - "epoch": 5.55, - "learning_rate": 2.2533512064343164e-05, - "loss": 0.2777, - "step": 2099 - }, - { - "epoch": 5.56, - "learning_rate": 2.25201072386059e-05, - "loss": 0.0657, - "step": 2100 - }, - { - "epoch": 5.56, - "learning_rate": 2.2506702412868634e-05, - "loss": 0.159, - "step": 2101 - }, - { - "epoch": 5.56, - "learning_rate": 2.249329758713137e-05, - "loss": 0.0342, - "step": 2102 - }, - { - "epoch": 5.56, - "learning_rate": 2.2479892761394104e-05, - "loss": 0.3001, - "step": 2103 - }, - { - "epoch": 5.57, - "learning_rate": 2.246648793565684e-05, - "loss": 0.0028, - "step": 2104 - }, - { - "epoch": 5.57, - "learning_rate": 2.2453083109919574e-05, - "loss": 0.0191, - "step": 2105 - }, - { - "epoch": 5.57, - "learning_rate": 2.2439678284182306e-05, - "loss": 0.0012, - "step": 2106 - }, - { - "epoch": 5.57, - "learning_rate": 2.242627345844504e-05, - "loss": 0.2619, - "step": 2107 - }, - { - "epoch": 5.58, - "learning_rate": 2.2412868632707776e-05, - "loss": 0.001, - "step": 2108 - }, - { - "epoch": 5.58, - "learning_rate": 2.239946380697051e-05, - "loss": 0.094, - "step": 2109 - }, - { - "epoch": 5.58, - "learning_rate": 2.2386058981233247e-05, - "loss": 0.003, - "step": 2110 - }, - { - "epoch": 5.58, - "learning_rate": 2.237265415549598e-05, - "loss": 0.0528, - "step": 2111 - }, - { - "epoch": 5.59, - "learning_rate": 2.2359249329758714e-05, - "loss": 0.1252, - "step": 2112 - }, - { - "epoch": 5.59, - "learning_rate": 2.234584450402145e-05, - "loss": 0.0039, - "step": 2113 - }, - { - "epoch": 5.59, - "learning_rate": 2.2332439678284184e-05, - "loss": 0.0913, - "step": 2114 - }, - { - "epoch": 5.6, - "learning_rate": 2.231903485254692e-05, - "loss": 0.0023, - "step": 2115 - }, - { - "epoch": 5.6, - "learning_rate": 2.230563002680965e-05, - "loss": 0.0047, - "step": 2116 - }, - { - "epoch": 5.6, - "learning_rate": 2.2292225201072386e-05, - "loss": 0.0688, - "step": 2117 - }, - { - "epoch": 5.6, - "learning_rate": 2.227882037533512e-05, - "loss": 0.0013, - "step": 2118 - }, - { - "epoch": 5.61, - "learning_rate": 2.2265415549597856e-05, - "loss": 0.0012, - "step": 2119 - }, - { - "epoch": 5.61, - "learning_rate": 2.225201072386059e-05, - "loss": 0.0048, - "step": 2120 - }, - { - "epoch": 5.61, - "learning_rate": 2.2238605898123323e-05, - "loss": 0.0011, - "step": 2121 - }, - { - "epoch": 5.61, - "learning_rate": 2.2225201072386058e-05, - "loss": 0.0009, - "step": 2122 - }, - { - "epoch": 5.62, - "learning_rate": 2.2211796246648793e-05, - "loss": 0.0015, - "step": 2123 - }, - { - "epoch": 5.62, - "learning_rate": 2.2198391420911528e-05, - "loss": 0.0077, - "step": 2124 - }, - { - "epoch": 5.62, - "learning_rate": 2.2184986595174263e-05, - "loss": 0.0007, - "step": 2125 - }, - { - "epoch": 5.62, - "learning_rate": 2.2171581769437e-05, - "loss": 0.0007, - "step": 2126 - }, - { - "epoch": 5.63, - "learning_rate": 2.2158176943699734e-05, - "loss": 0.0506, - "step": 2127 - }, - { - "epoch": 5.63, - "learning_rate": 2.214477211796247e-05, - "loss": 0.0016, - "step": 2128 - }, - { - "epoch": 5.63, - "learning_rate": 2.2131367292225204e-05, - "loss": 0.0005, - "step": 2129 - }, - { - "epoch": 5.63, - "learning_rate": 2.211796246648794e-05, - "loss": 0.0049, - "step": 2130 - }, - { - "epoch": 5.64, - "learning_rate": 2.210455764075067e-05, - "loss": 0.0305, - "step": 2131 - }, - { - "epoch": 5.64, - "learning_rate": 2.2091152815013406e-05, - "loss": 0.0448, - "step": 2132 - }, - { - "epoch": 5.64, - "learning_rate": 2.207774798927614e-05, - "loss": 0.5391, - "step": 2133 - }, - { - "epoch": 5.65, - "learning_rate": 2.2064343163538876e-05, - "loss": 0.0005, - "step": 2134 - }, - { - "epoch": 5.65, - "learning_rate": 2.205093833780161e-05, - "loss": 0.0141, - "step": 2135 - }, - { - "epoch": 5.65, - "learning_rate": 2.2037533512064346e-05, - "loss": 0.3613, - "step": 2136 - }, - { - "epoch": 5.65, - "learning_rate": 2.2024128686327078e-05, - "loss": 0.0013, - "step": 2137 - }, - { - "epoch": 5.66, - "learning_rate": 2.2010723860589813e-05, - "loss": 0.002, - "step": 2138 - }, - { - "epoch": 5.66, - "learning_rate": 2.199731903485255e-05, - "loss": 0.0207, - "step": 2139 - }, - { - "epoch": 5.66, - "learning_rate": 2.1983914209115284e-05, - "loss": 0.0004, - "step": 2140 - }, - { - "epoch": 5.66, - "learning_rate": 2.197050938337802e-05, - "loss": 0.0022, - "step": 2141 - }, - { - "epoch": 5.67, - "learning_rate": 2.195710455764075e-05, - "loss": 0.5076, - "step": 2142 - }, - { - "epoch": 5.67, - "learning_rate": 2.1943699731903486e-05, - "loss": 0.0016, - "step": 2143 - }, - { - "epoch": 5.67, - "learning_rate": 2.193029490616622e-05, - "loss": 0.0014, - "step": 2144 - }, - { - "epoch": 5.67, - "learning_rate": 2.1916890080428956e-05, - "loss": 0.0101, - "step": 2145 - }, - { - "epoch": 5.68, - "learning_rate": 2.190348525469169e-05, - "loss": 0.0048, - "step": 2146 - }, - { - "epoch": 5.68, - "learning_rate": 2.1890080428954423e-05, - "loss": 0.001, - "step": 2147 - }, - { - "epoch": 5.68, - "learning_rate": 2.1876675603217158e-05, - "loss": 0.0004, - "step": 2148 - }, - { - "epoch": 5.69, - "learning_rate": 2.1863270777479893e-05, - "loss": 0.2627, - "step": 2149 - }, - { - "epoch": 5.69, - "learning_rate": 2.1849865951742628e-05, - "loss": 0.0013, - "step": 2150 - }, - { - "epoch": 5.69, - "learning_rate": 2.1836461126005363e-05, - "loss": 0.0074, - "step": 2151 - }, - { - "epoch": 5.69, - "learning_rate": 2.1823056300268095e-05, - "loss": 0.0238, - "step": 2152 - }, - { - "epoch": 5.7, - "learning_rate": 2.180965147453083e-05, - "loss": 0.0013, - "step": 2153 - }, - { - "epoch": 5.7, - "learning_rate": 2.1796246648793565e-05, - "loss": 0.0005, - "step": 2154 - }, - { - "epoch": 5.7, - "learning_rate": 2.17828418230563e-05, - "loss": 0.0006, - "step": 2155 - }, - { - "epoch": 5.7, - "learning_rate": 2.1769436997319036e-05, - "loss": 0.0442, - "step": 2156 - }, - { - "epoch": 5.71, - "learning_rate": 2.175603217158177e-05, - "loss": 0.0007, - "step": 2157 - }, - { - "epoch": 5.71, - "learning_rate": 2.1742627345844506e-05, - "loss": 0.0032, - "step": 2158 - }, - { - "epoch": 5.71, - "learning_rate": 2.172922252010724e-05, - "loss": 0.005, - "step": 2159 - }, - { - "epoch": 5.71, - "learning_rate": 2.1715817694369976e-05, - "loss": 0.0005, - "step": 2160 - }, - { - "epoch": 5.72, - "learning_rate": 2.170241286863271e-05, - "loss": 0.0005, - "step": 2161 - }, - { - "epoch": 5.72, - "learning_rate": 2.1689008042895443e-05, - "loss": 0.2023, - "step": 2162 - }, - { - "epoch": 5.72, - "learning_rate": 2.1675603217158178e-05, - "loss": 0.3146, - "step": 2163 - }, - { - "epoch": 5.72, - "learning_rate": 2.1662198391420913e-05, - "loss": 0.0035, - "step": 2164 - }, - { - "epoch": 5.73, - "learning_rate": 2.164879356568365e-05, - "loss": 0.141, - "step": 2165 - }, - { - "epoch": 5.73, - "learning_rate": 2.1635388739946383e-05, - "loss": 0.0005, - "step": 2166 - }, - { - "epoch": 5.73, - "learning_rate": 2.1621983914209115e-05, - "loss": 0.0013, - "step": 2167 - }, - { - "epoch": 5.74, - "learning_rate": 2.160857908847185e-05, - "loss": 0.0484, - "step": 2168 - }, - { - "epoch": 5.74, - "learning_rate": 2.1595174262734585e-05, - "loss": 0.0109, - "step": 2169 - }, - { - "epoch": 5.74, - "learning_rate": 2.158176943699732e-05, - "loss": 0.3307, - "step": 2170 - }, - { - "epoch": 5.74, - "learning_rate": 2.1568364611260056e-05, - "loss": 0.0013, - "step": 2171 - }, - { - "epoch": 5.75, - "learning_rate": 2.1554959785522787e-05, - "loss": 0.0005, - "step": 2172 - }, - { - "epoch": 5.75, - "learning_rate": 2.1541554959785523e-05, - "loss": 0.0004, - "step": 2173 - }, - { - "epoch": 5.75, - "learning_rate": 2.1528150134048258e-05, - "loss": 0.0744, - "step": 2174 - }, - { - "epoch": 5.75, - "learning_rate": 2.1514745308310993e-05, - "loss": 0.0008, - "step": 2175 - }, - { - "epoch": 5.76, - "learning_rate": 2.1501340482573728e-05, - "loss": 0.0013, - "step": 2176 - }, - { - "epoch": 5.76, - "learning_rate": 2.148793565683646e-05, - "loss": 0.0006, - "step": 2177 - }, - { - "epoch": 5.76, - "learning_rate": 2.1474530831099195e-05, - "loss": 0.055, - "step": 2178 - }, - { - "epoch": 5.76, - "learning_rate": 2.146112600536193e-05, - "loss": 0.0013, - "step": 2179 - }, - { - "epoch": 5.77, - "learning_rate": 2.1447721179624665e-05, - "loss": 0.0022, - "step": 2180 - }, - { - "epoch": 5.77, - "learning_rate": 2.14343163538874e-05, - "loss": 0.0009, - "step": 2181 - }, - { - "epoch": 5.77, - "learning_rate": 2.1420911528150135e-05, - "loss": 0.0011, - "step": 2182 - }, - { - "epoch": 5.78, - "learning_rate": 2.140750670241287e-05, - "loss": 0.0676, - "step": 2183 - }, - { - "epoch": 5.78, - "learning_rate": 2.1394101876675606e-05, - "loss": 0.4583, - "step": 2184 - }, - { - "epoch": 5.78, - "learning_rate": 2.138069705093834e-05, - "loss": 0.0009, - "step": 2185 - }, - { - "epoch": 5.78, - "learning_rate": 2.1367292225201076e-05, - "loss": 0.002, - "step": 2186 - }, - { - "epoch": 5.79, - "learning_rate": 2.1353887399463808e-05, - "loss": 0.7625, - "step": 2187 - }, - { - "epoch": 5.79, - "learning_rate": 2.1340482573726543e-05, - "loss": 0.4912, - "step": 2188 - }, - { - "epoch": 5.79, - "learning_rate": 2.1327077747989278e-05, - "loss": 0.0283, - "step": 2189 - }, - { - "epoch": 5.79, - "learning_rate": 2.1313672922252013e-05, - "loss": 0.0011, - "step": 2190 - }, - { - "epoch": 5.8, - "learning_rate": 2.1300268096514748e-05, - "loss": 0.0013, - "step": 2191 - }, - { - "epoch": 5.8, - "learning_rate": 2.128686327077748e-05, - "loss": 0.3384, - "step": 2192 - }, - { - "epoch": 5.8, - "learning_rate": 2.1273458445040215e-05, - "loss": 0.4533, - "step": 2193 - }, - { - "epoch": 5.8, - "learning_rate": 2.126005361930295e-05, - "loss": 0.0039, - "step": 2194 - }, - { - "epoch": 5.81, - "learning_rate": 2.1246648793565685e-05, - "loss": 0.0038, - "step": 2195 - }, - { - "epoch": 5.81, - "learning_rate": 2.123324396782842e-05, - "loss": 0.0318, - "step": 2196 - }, - { - "epoch": 5.81, - "learning_rate": 2.1219839142091156e-05, - "loss": 0.0045, - "step": 2197 - }, - { - "epoch": 5.81, - "learning_rate": 2.1206434316353887e-05, - "loss": 0.3134, - "step": 2198 - }, - { - "epoch": 5.82, - "learning_rate": 2.1193029490616622e-05, - "loss": 0.0011, - "step": 2199 - }, - { - "epoch": 5.82, - "learning_rate": 2.1179624664879358e-05, - "loss": 0.0008, - "step": 2200 - }, - { - "epoch": 5.82, - "learning_rate": 2.1166219839142093e-05, - "loss": 0.0009, - "step": 2201 - }, - { - "epoch": 5.83, - "learning_rate": 2.1152815013404828e-05, - "loss": 0.0083, - "step": 2202 - }, - { - "epoch": 5.83, - "learning_rate": 2.113941018766756e-05, - "loss": 0.0196, - "step": 2203 - }, - { - "epoch": 5.83, - "learning_rate": 2.1126005361930295e-05, - "loss": 0.0063, - "step": 2204 - }, - { - "epoch": 5.83, - "learning_rate": 2.111260053619303e-05, - "loss": 0.0064, - "step": 2205 - }, - { - "epoch": 5.84, - "learning_rate": 2.1099195710455765e-05, - "loss": 0.0143, - "step": 2206 - }, - { - "epoch": 5.84, - "learning_rate": 2.10857908847185e-05, - "loss": 0.0012, - "step": 2207 - }, - { - "epoch": 5.84, - "learning_rate": 2.1072386058981232e-05, - "loss": 0.0033, - "step": 2208 - }, - { - "epoch": 5.84, - "learning_rate": 2.1058981233243967e-05, - "loss": 0.0014, - "step": 2209 - }, - { - "epoch": 5.85, - "learning_rate": 2.1045576407506702e-05, - "loss": 0.0219, - "step": 2210 - }, - { - "epoch": 5.85, - "learning_rate": 2.1032171581769437e-05, - "loss": 0.3033, - "step": 2211 - }, - { - "epoch": 5.85, - "learning_rate": 2.1018766756032172e-05, - "loss": 0.0711, - "step": 2212 - }, - { - "epoch": 5.85, - "learning_rate": 2.1005361930294907e-05, - "loss": 0.0051, - "step": 2213 - }, - { - "epoch": 5.86, - "learning_rate": 2.0991957104557643e-05, - "loss": 0.026, - "step": 2214 - }, - { - "epoch": 5.86, - "learning_rate": 2.0978552278820378e-05, - "loss": 0.0024, - "step": 2215 - }, - { - "epoch": 5.86, - "learning_rate": 2.0965147453083113e-05, - "loss": 0.3622, - "step": 2216 - }, - { - "epoch": 5.87, - "learning_rate": 2.0951742627345848e-05, - "loss": 0.0009, - "step": 2217 - }, - { - "epoch": 5.87, - "learning_rate": 2.093833780160858e-05, - "loss": 0.0019, - "step": 2218 - }, - { - "epoch": 5.87, - "learning_rate": 2.0924932975871315e-05, - "loss": 0.3412, - "step": 2219 - }, - { - "epoch": 5.87, - "learning_rate": 2.091152815013405e-05, - "loss": 0.0197, - "step": 2220 - }, - { - "epoch": 5.88, - "learning_rate": 2.0898123324396785e-05, - "loss": 0.4229, - "step": 2221 - }, - { - "epoch": 5.88, - "learning_rate": 2.088471849865952e-05, - "loss": 0.0014, - "step": 2222 - }, - { - "epoch": 5.88, - "learning_rate": 2.0871313672922252e-05, - "loss": 0.0183, - "step": 2223 - }, - { - "epoch": 5.88, - "learning_rate": 2.0857908847184987e-05, - "loss": 0.2005, - "step": 2224 - }, - { - "epoch": 5.89, - "learning_rate": 2.0844504021447722e-05, - "loss": 0.0122, - "step": 2225 - }, - { - "epoch": 5.89, - "learning_rate": 2.0831099195710457e-05, - "loss": 0.1178, - "step": 2226 - }, - { - "epoch": 5.89, - "learning_rate": 2.0817694369973193e-05, - "loss": 0.0105, - "step": 2227 - }, - { - "epoch": 5.89, - "learning_rate": 2.0804289544235924e-05, - "loss": 0.0328, - "step": 2228 - }, - { - "epoch": 5.9, - "learning_rate": 2.079088471849866e-05, - "loss": 0.0087, - "step": 2229 - }, - { - "epoch": 5.9, - "learning_rate": 2.0777479892761395e-05, - "loss": 0.0288, - "step": 2230 - }, - { - "epoch": 5.9, - "learning_rate": 2.076407506702413e-05, - "loss": 0.0017, - "step": 2231 - }, - { - "epoch": 5.9, - "learning_rate": 2.0750670241286865e-05, - "loss": 0.002, - "step": 2232 - }, - { - "epoch": 5.91, - "learning_rate": 2.0737265415549597e-05, - "loss": 0.0024, - "step": 2233 - }, - { - "epoch": 5.91, - "learning_rate": 2.072386058981233e-05, - "loss": 0.0008, - "step": 2234 - }, - { - "epoch": 5.91, - "learning_rate": 2.0710455764075067e-05, - "loss": 0.002, - "step": 2235 - }, - { - "epoch": 5.92, - "learning_rate": 2.0697050938337802e-05, - "loss": 0.0052, - "step": 2236 - }, - { - "epoch": 5.92, - "learning_rate": 2.0683646112600537e-05, - "loss": 0.0058, - "step": 2237 - }, - { - "epoch": 5.92, - "learning_rate": 2.0670241286863272e-05, - "loss": 0.0013, - "step": 2238 - }, - { - "epoch": 5.92, - "learning_rate": 2.0656836461126007e-05, - "loss": 0.0158, - "step": 2239 - }, - { - "epoch": 5.93, - "learning_rate": 2.0643431635388742e-05, - "loss": 0.229, - "step": 2240 - }, - { - "epoch": 5.93, - "learning_rate": 2.0630026809651478e-05, - "loss": 0.1844, - "step": 2241 - }, - { - "epoch": 5.93, - "learning_rate": 2.0616621983914213e-05, - "loss": 0.2905, - "step": 2242 - }, - { - "epoch": 5.93, - "learning_rate": 2.0603217158176944e-05, - "loss": 0.0059, - "step": 2243 - }, - { - "epoch": 5.94, - "learning_rate": 2.058981233243968e-05, - "loss": 0.0007, - "step": 2244 - }, - { - "epoch": 5.94, - "learning_rate": 2.0576407506702415e-05, - "loss": 0.1638, - "step": 2245 - }, - { - "epoch": 5.94, - "learning_rate": 2.056300268096515e-05, - "loss": 0.1195, - "step": 2246 - }, - { - "epoch": 5.94, - "learning_rate": 2.0549597855227885e-05, - "loss": 0.0015, - "step": 2247 - }, - { - "epoch": 5.95, - "learning_rate": 2.0536193029490617e-05, - "loss": 0.0013, - "step": 2248 - }, - { - "epoch": 5.95, - "learning_rate": 2.0522788203753352e-05, - "loss": 0.5152, - "step": 2249 - }, - { - "epoch": 5.95, - "learning_rate": 2.0509383378016087e-05, - "loss": 0.0315, - "step": 2250 - }, - { - "epoch": 5.96, - "learning_rate": 2.0495978552278822e-05, - "loss": 0.1213, - "step": 2251 - }, - { - "epoch": 5.96, - "learning_rate": 2.0482573726541557e-05, - "loss": 0.0006, - "step": 2252 - }, - { - "epoch": 5.96, - "learning_rate": 2.046916890080429e-05, - "loss": 0.0011, - "step": 2253 - }, - { - "epoch": 5.96, - "learning_rate": 2.0455764075067024e-05, - "loss": 0.2546, - "step": 2254 - }, - { - "epoch": 5.97, - "learning_rate": 2.044235924932976e-05, - "loss": 0.1259, - "step": 2255 - }, - { - "epoch": 5.97, - "learning_rate": 2.0428954423592494e-05, - "loss": 0.0179, - "step": 2256 - }, - { - "epoch": 5.97, - "learning_rate": 2.041554959785523e-05, - "loss": 0.0257, - "step": 2257 - }, - { - "epoch": 5.97, - "learning_rate": 2.0402144772117965e-05, - "loss": 0.0092, - "step": 2258 - }, - { - "epoch": 5.98, - "learning_rate": 2.0388739946380696e-05, - "loss": 0.3231, - "step": 2259 - }, - { - "epoch": 5.98, - "learning_rate": 2.037533512064343e-05, - "loss": 0.0084, - "step": 2260 - }, - { - "epoch": 5.98, - "learning_rate": 2.0361930294906167e-05, - "loss": 0.0517, - "step": 2261 - }, - { - "epoch": 5.98, - "learning_rate": 2.0348525469168902e-05, - "loss": 0.0012, - "step": 2262 - }, - { - "epoch": 5.99, - "learning_rate": 2.0335120643431637e-05, - "loss": 0.0045, - "step": 2263 - }, - { - "epoch": 5.99, - "learning_rate": 2.032171581769437e-05, - "loss": 0.0012, - "step": 2264 - }, - { - "epoch": 5.99, - "learning_rate": 2.0308310991957104e-05, - "loss": 0.0032, - "step": 2265 - }, - { - "epoch": 5.99, - "learning_rate": 2.029490616621984e-05, - "loss": 0.0038, - "step": 2266 - }, - { - "epoch": 6.0, - "learning_rate": 2.0281501340482574e-05, - "loss": 0.2731, - "step": 2267 - }, - { - "epoch": 6.0, - "learning_rate": 2.026809651474531e-05, - "loss": 0.3122, - "step": 2268 - }, - { - "epoch": 6.0, - "eval_f1": 0.7762762762762763, - "eval_loss": 1.0977023839950562, - "eval_runtime": 1.8624, - "eval_samples_per_second": 812.383, - "eval_steps_per_second": 51.009, - "step": 2268 - }, - { - "epoch": 6.0, - "learning_rate": 2.0254691689008044e-05, - "loss": 0.0035, - "step": 2269 - }, - { - "epoch": 6.01, - "learning_rate": 2.024128686327078e-05, - "loss": 0.0709, - "step": 2270 - }, - { - "epoch": 6.01, - "learning_rate": 2.0227882037533515e-05, - "loss": 0.1548, - "step": 2271 - }, - { - "epoch": 6.01, - "learning_rate": 2.021447721179625e-05, - "loss": 0.0035, - "step": 2272 - }, - { - "epoch": 6.01, - "learning_rate": 2.0201072386058985e-05, - "loss": 0.0009, - "step": 2273 - }, - { - "epoch": 6.02, - "learning_rate": 2.0187667560321717e-05, - "loss": 0.0061, - "step": 2274 - }, - { - "epoch": 6.02, - "learning_rate": 2.0174262734584452e-05, - "loss": 0.3586, - "step": 2275 - }, - { - "epoch": 6.02, - "learning_rate": 2.0160857908847187e-05, - "loss": 0.0019, - "step": 2276 - }, - { - "epoch": 6.02, - "learning_rate": 2.0147453083109922e-05, - "loss": 0.0077, - "step": 2277 - }, - { - "epoch": 6.03, - "learning_rate": 2.0134048257372657e-05, - "loss": 0.0022, - "step": 2278 - }, - { - "epoch": 6.03, - "learning_rate": 2.012064343163539e-05, - "loss": 0.0021, - "step": 2279 - }, - { - "epoch": 6.03, - "learning_rate": 2.0107238605898124e-05, - "loss": 0.0022, - "step": 2280 - }, - { - "epoch": 6.03, - "learning_rate": 2.009383378016086e-05, - "loss": 0.1387, - "step": 2281 - }, - { - "epoch": 6.04, - "learning_rate": 2.0080428954423594e-05, - "loss": 0.0034, - "step": 2282 - }, - { - "epoch": 6.04, - "learning_rate": 2.006702412868633e-05, - "loss": 0.0128, - "step": 2283 - }, - { - "epoch": 6.04, - "learning_rate": 2.005361930294906e-05, - "loss": 0.0201, - "step": 2284 - }, - { - "epoch": 6.04, - "learning_rate": 2.0040214477211796e-05, - "loss": 0.0072, - "step": 2285 - }, - { - "epoch": 6.05, - "learning_rate": 2.002680965147453e-05, - "loss": 0.0031, - "step": 2286 - }, - { - "epoch": 6.05, - "learning_rate": 2.0013404825737267e-05, - "loss": 0.0273, - "step": 2287 - }, - { - "epoch": 6.05, - "learning_rate": 2e-05, - "loss": 0.0679, - "step": 2288 - }, - { - "epoch": 6.06, - "learning_rate": 1.9986595174262733e-05, - "loss": 0.0012, - "step": 2289 - }, - { - "epoch": 6.06, - "learning_rate": 1.997319034852547e-05, - "loss": 0.0045, - "step": 2290 - }, - { - "epoch": 6.06, - "learning_rate": 1.9959785522788204e-05, - "loss": 0.0289, - "step": 2291 - }, - { - "epoch": 6.06, - "learning_rate": 1.994638069705094e-05, - "loss": 0.1421, - "step": 2292 - }, - { - "epoch": 6.07, - "learning_rate": 1.9932975871313674e-05, - "loss": 0.0007, - "step": 2293 - }, - { - "epoch": 6.07, - "learning_rate": 1.9919571045576406e-05, - "loss": 0.0037, - "step": 2294 - }, - { - "epoch": 6.07, - "learning_rate": 1.990616621983914e-05, - "loss": 0.1566, - "step": 2295 - }, - { - "epoch": 6.07, - "learning_rate": 1.9892761394101876e-05, - "loss": 0.0008, - "step": 2296 - }, - { - "epoch": 6.08, - "learning_rate": 1.987935656836461e-05, - "loss": 0.0008, - "step": 2297 - }, - { - "epoch": 6.08, - "learning_rate": 1.9865951742627346e-05, - "loss": 0.1319, - "step": 2298 - }, - { - "epoch": 6.08, - "learning_rate": 1.985254691689008e-05, - "loss": 0.0007, - "step": 2299 - }, - { - "epoch": 6.08, - "learning_rate": 1.9839142091152816e-05, - "loss": 0.0048, - "step": 2300 - }, - { - "epoch": 6.09, - "learning_rate": 1.982573726541555e-05, - "loss": 0.223, - "step": 2301 - }, - { - "epoch": 6.09, - "learning_rate": 1.9812332439678287e-05, - "loss": 0.2188, - "step": 2302 - }, - { - "epoch": 6.09, - "learning_rate": 1.9798927613941022e-05, - "loss": 0.0209, - "step": 2303 - }, - { - "epoch": 6.1, - "learning_rate": 1.9785522788203754e-05, - "loss": 0.001, - "step": 2304 - }, - { - "epoch": 6.1, - "learning_rate": 1.977211796246649e-05, - "loss": 0.0006, - "step": 2305 - }, - { - "epoch": 6.1, - "learning_rate": 1.9758713136729224e-05, - "loss": 0.0011, - "step": 2306 - }, - { - "epoch": 6.1, - "learning_rate": 1.974530831099196e-05, - "loss": 0.0008, - "step": 2307 - }, - { - "epoch": 6.11, - "learning_rate": 1.9731903485254694e-05, - "loss": 0.0015, - "step": 2308 - }, - { - "epoch": 6.11, - "learning_rate": 1.9718498659517426e-05, - "loss": 0.0024, - "step": 2309 - }, - { - "epoch": 6.11, - "learning_rate": 1.970509383378016e-05, - "loss": 0.0006, - "step": 2310 - }, - { - "epoch": 6.11, - "learning_rate": 1.9691689008042896e-05, - "loss": 0.0007, - "step": 2311 - }, - { - "epoch": 6.12, - "learning_rate": 1.967828418230563e-05, - "loss": 0.0026, - "step": 2312 - }, - { - "epoch": 6.12, - "learning_rate": 1.9664879356568366e-05, - "loss": 0.0019, - "step": 2313 - }, - { - "epoch": 6.12, - "learning_rate": 1.9651474530831098e-05, - "loss": 0.0055, - "step": 2314 - }, - { - "epoch": 6.12, - "learning_rate": 1.9638069705093833e-05, - "loss": 0.0023, - "step": 2315 - }, - { - "epoch": 6.13, - "learning_rate": 1.962466487935657e-05, - "loss": 0.0011, - "step": 2316 - }, - { - "epoch": 6.13, - "learning_rate": 1.9611260053619303e-05, - "loss": 0.0407, - "step": 2317 - }, - { - "epoch": 6.13, - "learning_rate": 1.959785522788204e-05, - "loss": 0.046, - "step": 2318 - }, - { - "epoch": 6.13, - "learning_rate": 1.958445040214477e-05, - "loss": 0.0062, - "step": 2319 - }, - { - "epoch": 6.14, - "learning_rate": 1.9571045576407505e-05, - "loss": 0.0007, - "step": 2320 - }, - { - "epoch": 6.14, - "learning_rate": 1.955764075067024e-05, - "loss": 0.3377, - "step": 2321 - }, - { - "epoch": 6.14, - "learning_rate": 1.9544235924932976e-05, - "loss": 0.4294, - "step": 2322 - }, - { - "epoch": 6.15, - "learning_rate": 1.953083109919571e-05, - "loss": 0.0016, - "step": 2323 - }, - { - "epoch": 6.15, - "learning_rate": 1.9517426273458446e-05, - "loss": 0.0283, - "step": 2324 - }, - { - "epoch": 6.15, - "learning_rate": 1.950402144772118e-05, - "loss": 0.0005, - "step": 2325 - }, - { - "epoch": 6.15, - "learning_rate": 1.9490616621983916e-05, - "loss": 0.0011, - "step": 2326 - }, - { - "epoch": 6.16, - "learning_rate": 1.947721179624665e-05, - "loss": 0.0237, - "step": 2327 - }, - { - "epoch": 6.16, - "learning_rate": 1.9463806970509387e-05, - "loss": 0.0581, - "step": 2328 - }, - { - "epoch": 6.16, - "learning_rate": 1.945040214477212e-05, - "loss": 0.0905, - "step": 2329 - }, - { - "epoch": 6.16, - "learning_rate": 1.9436997319034853e-05, - "loss": 0.0012, - "step": 2330 - }, - { - "epoch": 6.17, - "learning_rate": 1.942359249329759e-05, - "loss": 0.0053, - "step": 2331 - }, - { - "epoch": 6.17, - "learning_rate": 1.9410187667560324e-05, - "loss": 0.0225, - "step": 2332 - }, - { - "epoch": 6.17, - "learning_rate": 1.939678284182306e-05, - "loss": 0.0374, - "step": 2333 - }, - { - "epoch": 6.17, - "learning_rate": 1.9383378016085794e-05, - "loss": 0.0006, - "step": 2334 - }, - { - "epoch": 6.18, - "learning_rate": 1.9369973190348526e-05, - "loss": 0.0008, - "step": 2335 - }, - { - "epoch": 6.18, - "learning_rate": 1.935656836461126e-05, - "loss": 0.0006, - "step": 2336 - }, - { - "epoch": 6.18, - "learning_rate": 1.9343163538873996e-05, - "loss": 0.0011, - "step": 2337 - }, - { - "epoch": 6.19, - "learning_rate": 1.932975871313673e-05, - "loss": 0.0014, - "step": 2338 - }, - { - "epoch": 6.19, - "learning_rate": 1.9316353887399466e-05, - "loss": 0.0006, - "step": 2339 - }, - { - "epoch": 6.19, - "learning_rate": 1.9302949061662198e-05, - "loss": 0.0504, - "step": 2340 - }, - { - "epoch": 6.19, - "learning_rate": 1.9289544235924933e-05, - "loss": 0.005, - "step": 2341 - }, - { - "epoch": 6.2, - "learning_rate": 1.9276139410187668e-05, - "loss": 0.2673, - "step": 2342 - }, - { - "epoch": 6.2, - "learning_rate": 1.9262734584450403e-05, - "loss": 0.2173, - "step": 2343 - }, - { - "epoch": 6.2, - "learning_rate": 1.924932975871314e-05, - "loss": 0.0235, - "step": 2344 - }, - { - "epoch": 6.2, - "learning_rate": 1.923592493297587e-05, - "loss": 0.0092, - "step": 2345 - }, - { - "epoch": 6.21, - "learning_rate": 1.9222520107238605e-05, - "loss": 0.0088, - "step": 2346 - }, - { - "epoch": 6.21, - "learning_rate": 1.920911528150134e-05, - "loss": 0.0506, - "step": 2347 - }, - { - "epoch": 6.21, - "learning_rate": 1.9195710455764076e-05, - "loss": 0.0033, - "step": 2348 - }, - { - "epoch": 6.21, - "learning_rate": 1.918230563002681e-05, - "loss": 0.0006, - "step": 2349 - }, - { - "epoch": 6.22, - "learning_rate": 1.9168900804289542e-05, - "loss": 0.0021, - "step": 2350 - }, - { - "epoch": 6.22, - "learning_rate": 1.9155495978552278e-05, - "loss": 0.0006, - "step": 2351 - }, - { - "epoch": 6.22, - "learning_rate": 1.9142091152815013e-05, - "loss": 0.0041, - "step": 2352 - }, - { - "epoch": 6.22, - "learning_rate": 1.9128686327077748e-05, - "loss": 0.0209, - "step": 2353 - }, - { - "epoch": 6.23, - "learning_rate": 1.9115281501340483e-05, - "loss": 0.0145, - "step": 2354 - }, - { - "epoch": 6.23, - "learning_rate": 1.9101876675603218e-05, - "loss": 0.0303, - "step": 2355 - }, - { - "epoch": 6.23, - "learning_rate": 1.9088471849865953e-05, - "loss": 0.0951, - "step": 2356 - }, - { - "epoch": 6.24, - "learning_rate": 1.907506702412869e-05, - "loss": 0.042, - "step": 2357 - }, - { - "epoch": 6.24, - "learning_rate": 1.9061662198391424e-05, - "loss": 0.0009, - "step": 2358 - }, - { - "epoch": 6.24, - "learning_rate": 1.904825737265416e-05, - "loss": 0.0006, - "step": 2359 - }, - { - "epoch": 6.24, - "learning_rate": 1.903485254691689e-05, - "loss": 0.0057, - "step": 2360 - }, - { - "epoch": 6.25, - "learning_rate": 1.9021447721179626e-05, - "loss": 0.0578, - "step": 2361 - }, - { - "epoch": 6.25, - "learning_rate": 1.900804289544236e-05, - "loss": 0.3295, - "step": 2362 - }, - { - "epoch": 6.25, - "learning_rate": 1.8994638069705096e-05, - "loss": 0.0005, - "step": 2363 - }, - { - "epoch": 6.25, - "learning_rate": 1.898123324396783e-05, - "loss": 0.0009, - "step": 2364 - }, - { - "epoch": 6.26, - "learning_rate": 1.8967828418230563e-05, - "loss": 0.0036, - "step": 2365 - }, - { - "epoch": 6.26, - "learning_rate": 1.8954423592493298e-05, - "loss": 0.0029, - "step": 2366 - }, - { - "epoch": 6.26, - "learning_rate": 1.8941018766756033e-05, - "loss": 0.1468, - "step": 2367 - }, - { - "epoch": 6.26, - "learning_rate": 1.8927613941018768e-05, - "loss": 0.3163, - "step": 2368 - }, - { - "epoch": 6.27, - "learning_rate": 1.8914209115281503e-05, - "loss": 0.169, - "step": 2369 - }, - { - "epoch": 6.27, - "learning_rate": 1.8900804289544235e-05, - "loss": 0.0493, - "step": 2370 - }, - { - "epoch": 6.27, - "learning_rate": 1.888739946380697e-05, - "loss": 0.0003, - "step": 2371 - }, - { - "epoch": 6.28, - "learning_rate": 1.8873994638069705e-05, - "loss": 0.0015, - "step": 2372 - }, - { - "epoch": 6.28, - "learning_rate": 1.886058981233244e-05, - "loss": 0.258, - "step": 2373 - }, - { - "epoch": 6.28, - "learning_rate": 1.8847184986595175e-05, - "loss": 0.002, - "step": 2374 - }, - { - "epoch": 6.28, - "learning_rate": 1.8833780160857907e-05, - "loss": 0.0008, - "step": 2375 - }, - { - "epoch": 6.29, - "learning_rate": 1.8820375335120642e-05, - "loss": 0.0003, - "step": 2376 - }, - { - "epoch": 6.29, - "learning_rate": 1.8806970509383377e-05, - "loss": 0.0014, - "step": 2377 - }, - { - "epoch": 6.29, - "learning_rate": 1.8793565683646113e-05, - "loss": 0.0068, - "step": 2378 - }, - { - "epoch": 6.29, - "learning_rate": 1.8780160857908848e-05, - "loss": 0.39, - "step": 2379 - }, - { - "epoch": 6.3, - "learning_rate": 1.8766756032171583e-05, - "loss": 0.0046, - "step": 2380 - }, - { - "epoch": 6.3, - "learning_rate": 1.8753351206434318e-05, - "loss": 0.0008, - "step": 2381 - }, - { - "epoch": 6.3, - "learning_rate": 1.8739946380697053e-05, - "loss": 0.0007, - "step": 2382 - }, - { - "epoch": 6.3, - "learning_rate": 1.8726541554959788e-05, - "loss": 0.0013, - "step": 2383 - }, - { - "epoch": 6.31, - "learning_rate": 1.8713136729222523e-05, - "loss": 0.0055, - "step": 2384 - }, - { - "epoch": 6.31, - "learning_rate": 1.869973190348526e-05, - "loss": 0.0014, - "step": 2385 - }, - { - "epoch": 6.31, - "learning_rate": 1.868632707774799e-05, - "loss": 0.0039, - "step": 2386 - }, - { - "epoch": 6.31, - "learning_rate": 1.8672922252010725e-05, - "loss": 0.0995, - "step": 2387 - }, - { - "epoch": 6.32, - "learning_rate": 1.865951742627346e-05, - "loss": 0.0015, - "step": 2388 - }, - { - "epoch": 6.32, - "learning_rate": 1.8646112600536196e-05, - "loss": 0.083, - "step": 2389 - }, - { - "epoch": 6.32, - "learning_rate": 1.863270777479893e-05, - "loss": 0.0044, - "step": 2390 - }, - { - "epoch": 6.33, - "learning_rate": 1.8619302949061662e-05, - "loss": 0.0105, - "step": 2391 - }, - { - "epoch": 6.33, - "learning_rate": 1.8605898123324398e-05, - "loss": 0.0005, - "step": 2392 - }, - { - "epoch": 6.33, - "learning_rate": 1.8592493297587133e-05, - "loss": 0.0014, - "step": 2393 - }, - { - "epoch": 6.33, - "learning_rate": 1.8579088471849868e-05, - "loss": 0.0114, - "step": 2394 - }, - { - "epoch": 6.34, - "learning_rate": 1.8565683646112603e-05, - "loss": 0.0334, - "step": 2395 - }, - { - "epoch": 6.34, - "learning_rate": 1.8552278820375335e-05, - "loss": 0.006, - "step": 2396 - }, - { - "epoch": 6.34, - "learning_rate": 1.853887399463807e-05, - "loss": 0.3124, - "step": 2397 - }, - { - "epoch": 6.34, - "learning_rate": 1.8525469168900805e-05, - "loss": 0.2324, - "step": 2398 - }, - { - "epoch": 6.35, - "learning_rate": 1.851206434316354e-05, - "loss": 0.0889, - "step": 2399 - }, - { - "epoch": 6.35, - "learning_rate": 1.8498659517426275e-05, - "loss": 0.0705, - "step": 2400 - }, - { - "epoch": 6.35, - "learning_rate": 1.8485254691689007e-05, - "loss": 0.0012, - "step": 2401 - }, - { - "epoch": 6.35, - "learning_rate": 1.8471849865951742e-05, - "loss": 0.0033, - "step": 2402 - }, - { - "epoch": 6.36, - "learning_rate": 1.8458445040214477e-05, - "loss": 0.0021, - "step": 2403 - }, - { - "epoch": 6.36, - "learning_rate": 1.8445040214477212e-05, - "loss": 0.0005, - "step": 2404 - }, - { - "epoch": 6.36, - "learning_rate": 1.8431635388739948e-05, - "loss": 0.0011, - "step": 2405 - }, - { - "epoch": 6.37, - "learning_rate": 1.841823056300268e-05, - "loss": 0.0006, - "step": 2406 - }, - { - "epoch": 6.37, - "learning_rate": 1.8404825737265414e-05, - "loss": 0.0726, - "step": 2407 - }, - { - "epoch": 6.37, - "learning_rate": 1.839142091152815e-05, - "loss": 0.0009, - "step": 2408 - }, - { - "epoch": 6.37, - "learning_rate": 1.8378016085790885e-05, - "loss": 0.0007, - "step": 2409 - }, - { - "epoch": 6.38, - "learning_rate": 1.836461126005362e-05, - "loss": 0.0859, - "step": 2410 - }, - { - "epoch": 6.38, - "learning_rate": 1.8351206434316355e-05, - "loss": 0.0011, - "step": 2411 - }, - { - "epoch": 6.38, - "learning_rate": 1.833780160857909e-05, - "loss": 0.6542, - "step": 2412 - }, - { - "epoch": 6.38, - "learning_rate": 1.8324396782841825e-05, - "loss": 0.2733, - "step": 2413 - }, - { - "epoch": 6.39, - "learning_rate": 1.831099195710456e-05, - "loss": 0.2825, - "step": 2414 - }, - { - "epoch": 6.39, - "learning_rate": 1.8297587131367295e-05, - "loss": 0.0012, - "step": 2415 - }, - { - "epoch": 6.39, - "learning_rate": 1.8284182305630027e-05, - "loss": 0.1404, - "step": 2416 - }, - { - "epoch": 6.39, - "learning_rate": 1.8270777479892762e-05, - "loss": 0.0006, - "step": 2417 - }, - { - "epoch": 6.4, - "learning_rate": 1.8257372654155497e-05, - "loss": 0.0007, - "step": 2418 - }, - { - "epoch": 6.4, - "learning_rate": 1.8243967828418233e-05, - "loss": 0.1429, - "step": 2419 - }, - { - "epoch": 6.4, - "learning_rate": 1.8230563002680968e-05, - "loss": 0.0008, - "step": 2420 - }, - { - "epoch": 6.4, - "learning_rate": 1.82171581769437e-05, - "loss": 0.0062, - "step": 2421 - }, - { - "epoch": 6.41, - "learning_rate": 1.8203753351206435e-05, - "loss": 0.0071, - "step": 2422 - }, - { - "epoch": 6.41, - "learning_rate": 1.819034852546917e-05, - "loss": 0.0017, - "step": 2423 - }, - { - "epoch": 6.41, - "learning_rate": 1.8176943699731905e-05, - "loss": 0.084, - "step": 2424 - }, - { - "epoch": 6.42, - "learning_rate": 1.816353887399464e-05, - "loss": 0.0011, - "step": 2425 - }, - { - "epoch": 6.42, - "learning_rate": 1.8150134048257372e-05, - "loss": 0.0255, - "step": 2426 - }, - { - "epoch": 6.42, - "learning_rate": 1.8136729222520107e-05, - "loss": 0.0009, - "step": 2427 - }, - { - "epoch": 6.42, - "learning_rate": 1.8123324396782842e-05, - "loss": 0.3105, - "step": 2428 - }, - { - "epoch": 6.43, - "learning_rate": 1.8109919571045577e-05, - "loss": 0.0046, - "step": 2429 - }, - { - "epoch": 6.43, - "learning_rate": 1.8096514745308312e-05, - "loss": 0.0089, - "step": 2430 - }, - { - "epoch": 6.43, - "learning_rate": 1.8083109919571044e-05, - "loss": 0.1176, - "step": 2431 - }, - { - "epoch": 6.43, - "learning_rate": 1.806970509383378e-05, - "loss": 0.0235, - "step": 2432 - }, - { - "epoch": 6.44, - "learning_rate": 1.8056300268096514e-05, - "loss": 0.029, - "step": 2433 - }, - { - "epoch": 6.44, - "learning_rate": 1.804289544235925e-05, - "loss": 0.0013, - "step": 2434 - }, - { - "epoch": 6.44, - "learning_rate": 1.8029490616621985e-05, - "loss": 0.0075, - "step": 2435 - }, - { - "epoch": 6.44, - "learning_rate": 1.801608579088472e-05, - "loss": 0.1744, - "step": 2436 - }, - { - "epoch": 6.45, - "learning_rate": 1.8002680965147455e-05, - "loss": 0.0017, - "step": 2437 - }, - { - "epoch": 6.45, - "learning_rate": 1.798927613941019e-05, - "loss": 0.0188, - "step": 2438 - }, - { - "epoch": 6.45, - "learning_rate": 1.7975871313672925e-05, - "loss": 0.0232, - "step": 2439 - }, - { - "epoch": 6.46, - "learning_rate": 1.796246648793566e-05, - "loss": 0.1459, - "step": 2440 - }, - { - "epoch": 6.46, - "learning_rate": 1.7949061662198392e-05, - "loss": 0.0007, - "step": 2441 - }, - { - "epoch": 6.46, - "learning_rate": 1.7935656836461127e-05, - "loss": 0.0005, - "step": 2442 - }, - { - "epoch": 6.46, - "learning_rate": 1.7922252010723862e-05, - "loss": 0.0012, - "step": 2443 - }, - { - "epoch": 6.47, - "learning_rate": 1.7908847184986597e-05, - "loss": 0.0041, - "step": 2444 - }, - { - "epoch": 6.47, - "learning_rate": 1.7895442359249332e-05, - "loss": 0.4884, - "step": 2445 - }, - { - "epoch": 6.47, - "learning_rate": 1.7882037533512068e-05, - "loss": 0.0017, - "step": 2446 - }, - { - "epoch": 6.47, - "learning_rate": 1.78686327077748e-05, - "loss": 0.0566, - "step": 2447 - }, - { - "epoch": 6.48, - "learning_rate": 1.7855227882037534e-05, - "loss": 0.012, - "step": 2448 - }, - { - "epoch": 6.48, - "learning_rate": 1.784182305630027e-05, - "loss": 0.001, - "step": 2449 - }, - { - "epoch": 6.48, - "learning_rate": 1.7828418230563005e-05, - "loss": 0.0028, - "step": 2450 - }, - { - "epoch": 6.48, - "learning_rate": 1.781501340482574e-05, - "loss": 0.4622, - "step": 2451 - }, - { - "epoch": 6.49, - "learning_rate": 1.780160857908847e-05, - "loss": 0.0042, - "step": 2452 - }, - { - "epoch": 6.49, - "learning_rate": 1.7788203753351207e-05, - "loss": 0.0176, - "step": 2453 - }, - { - "epoch": 6.49, - "learning_rate": 1.7774798927613942e-05, - "loss": 0.0012, - "step": 2454 - }, - { - "epoch": 6.49, - "learning_rate": 1.7761394101876677e-05, - "loss": 0.0344, - "step": 2455 - }, - { - "epoch": 6.5, - "learning_rate": 1.7747989276139412e-05, - "loss": 0.1278, - "step": 2456 - }, - { - "epoch": 6.5, - "learning_rate": 1.7734584450402144e-05, - "loss": 0.0017, - "step": 2457 - }, - { - "epoch": 6.5, - "learning_rate": 1.772117962466488e-05, - "loss": 0.0044, - "step": 2458 - }, - { - "epoch": 6.51, - "learning_rate": 1.7707774798927614e-05, - "loss": 0.0016, - "step": 2459 - }, - { - "epoch": 6.51, - "learning_rate": 1.769436997319035e-05, - "loss": 0.0799, - "step": 2460 - }, - { - "epoch": 6.51, - "learning_rate": 1.7680965147453084e-05, - "loss": 0.0066, - "step": 2461 - }, - { - "epoch": 6.51, - "learning_rate": 1.7667560321715816e-05, - "loss": 0.1607, - "step": 2462 - }, - { - "epoch": 6.52, - "learning_rate": 1.765415549597855e-05, - "loss": 0.0742, - "step": 2463 - }, - { - "epoch": 6.52, - "learning_rate": 1.7640750670241286e-05, - "loss": 0.0005, - "step": 2464 - }, - { - "epoch": 6.52, - "learning_rate": 1.762734584450402e-05, - "loss": 0.0006, - "step": 2465 - }, - { - "epoch": 6.52, - "learning_rate": 1.7613941018766757e-05, - "loss": 0.0019, - "step": 2466 - }, - { - "epoch": 6.53, - "learning_rate": 1.7600536193029492e-05, - "loss": 0.0009, - "step": 2467 - }, - { - "epoch": 6.53, - "learning_rate": 1.7587131367292227e-05, - "loss": 0.0023, - "step": 2468 - }, - { - "epoch": 6.53, - "learning_rate": 1.7573726541554962e-05, - "loss": 0.0202, - "step": 2469 - }, - { - "epoch": 6.53, - "learning_rate": 1.7560321715817697e-05, - "loss": 0.0223, - "step": 2470 - }, - { - "epoch": 6.54, - "learning_rate": 1.7546916890080432e-05, - "loss": 0.0009, - "step": 2471 - }, - { - "epoch": 6.54, - "learning_rate": 1.7533512064343164e-05, - "loss": 0.0014, - "step": 2472 - }, - { - "epoch": 6.54, - "learning_rate": 1.75201072386059e-05, - "loss": 0.0514, - "step": 2473 - }, - { - "epoch": 6.54, - "learning_rate": 1.7506702412868634e-05, - "loss": 0.0013, - "step": 2474 - }, - { - "epoch": 6.55, - "learning_rate": 1.749329758713137e-05, - "loss": 0.0087, - "step": 2475 - }, - { - "epoch": 6.55, - "learning_rate": 1.7479892761394105e-05, - "loss": 0.0035, - "step": 2476 - }, - { - "epoch": 6.55, - "learning_rate": 1.7466487935656836e-05, - "loss": 0.0397, - "step": 2477 - }, - { - "epoch": 6.56, - "learning_rate": 1.745308310991957e-05, - "loss": 0.0021, - "step": 2478 - }, - { - "epoch": 6.56, - "learning_rate": 1.7439678284182307e-05, - "loss": 0.052, - "step": 2479 - }, - { - "epoch": 6.56, - "learning_rate": 1.742627345844504e-05, - "loss": 0.0027, - "step": 2480 - }, - { - "epoch": 6.56, - "learning_rate": 1.7412868632707777e-05, - "loss": 0.001, - "step": 2481 - }, - { - "epoch": 6.57, - "learning_rate": 1.739946380697051e-05, - "loss": 0.2899, - "step": 2482 - }, - { - "epoch": 6.57, - "learning_rate": 1.7386058981233244e-05, - "loss": 0.0007, - "step": 2483 - }, - { - "epoch": 6.57, - "learning_rate": 1.737265415549598e-05, - "loss": 0.0704, - "step": 2484 - }, - { - "epoch": 6.57, - "learning_rate": 1.7359249329758714e-05, - "loss": 0.001, - "step": 2485 - }, - { - "epoch": 6.58, - "learning_rate": 1.734584450402145e-05, - "loss": 0.057, - "step": 2486 - }, - { - "epoch": 6.58, - "learning_rate": 1.733243967828418e-05, - "loss": 0.0002, - "step": 2487 - }, - { - "epoch": 6.58, - "learning_rate": 1.7319034852546916e-05, - "loss": 0.0064, - "step": 2488 - }, - { - "epoch": 6.58, - "learning_rate": 1.730563002680965e-05, - "loss": 0.0638, - "step": 2489 - }, - { - "epoch": 6.59, - "learning_rate": 1.7292225201072386e-05, - "loss": 0.0006, - "step": 2490 - }, - { - "epoch": 6.59, - "learning_rate": 1.727882037533512e-05, - "loss": 0.0142, - "step": 2491 - }, - { - "epoch": 6.59, - "learning_rate": 1.7265415549597856e-05, - "loss": 0.0015, - "step": 2492 - }, - { - "epoch": 6.6, - "learning_rate": 1.725201072386059e-05, - "loss": 0.0949, - "step": 2493 - }, - { - "epoch": 6.6, - "learning_rate": 1.7238605898123327e-05, - "loss": 0.0004, - "step": 2494 - }, - { - "epoch": 6.6, - "learning_rate": 1.7225201072386062e-05, - "loss": 0.0111, - "step": 2495 - }, - { - "epoch": 6.6, - "learning_rate": 1.7211796246648797e-05, - "loss": 0.0215, - "step": 2496 - }, - { - "epoch": 6.61, - "learning_rate": 1.719839142091153e-05, - "loss": 0.266, - "step": 2497 - }, - { - "epoch": 6.61, - "learning_rate": 1.7184986595174264e-05, - "loss": 0.4487, - "step": 2498 - }, - { - "epoch": 6.61, - "learning_rate": 1.7171581769437e-05, - "loss": 0.0021, - "step": 2499 - }, - { - "epoch": 6.61, - "learning_rate": 1.7158176943699734e-05, - "loss": 0.0004, - "step": 2500 - }, - { - "epoch": 6.62, - "learning_rate": 1.714477211796247e-05, - "loss": 0.0004, - "step": 2501 - }, - { - "epoch": 6.62, - "learning_rate": 1.71313672922252e-05, - "loss": 0.0011, - "step": 2502 - }, - { - "epoch": 6.62, - "learning_rate": 1.7117962466487936e-05, - "loss": 0.0006, - "step": 2503 - }, - { - "epoch": 6.62, - "learning_rate": 1.710455764075067e-05, - "loss": 0.1005, - "step": 2504 - }, - { - "epoch": 6.63, - "learning_rate": 1.7091152815013406e-05, - "loss": 0.0472, - "step": 2505 - }, - { - "epoch": 6.63, - "learning_rate": 1.707774798927614e-05, - "loss": 0.0004, - "step": 2506 - }, - { - "epoch": 6.63, - "learning_rate": 1.7064343163538877e-05, - "loss": 0.0162, - "step": 2507 - }, - { - "epoch": 6.63, - "learning_rate": 1.705093833780161e-05, - "loss": 0.004, - "step": 2508 - }, - { - "epoch": 6.64, - "learning_rate": 1.7037533512064344e-05, - "loss": 0.0007, - "step": 2509 - }, - { - "epoch": 6.64, - "learning_rate": 1.702412868632708e-05, - "loss": 0.1447, - "step": 2510 - }, - { - "epoch": 6.64, - "learning_rate": 1.7010723860589814e-05, - "loss": 0.0006, - "step": 2511 - }, - { - "epoch": 6.65, - "learning_rate": 1.699731903485255e-05, - "loss": 0.0002, - "step": 2512 - }, - { - "epoch": 6.65, - "learning_rate": 1.698391420911528e-05, - "loss": 0.0004, - "step": 2513 - }, - { - "epoch": 6.65, - "learning_rate": 1.6970509383378016e-05, - "loss": 0.0017, - "step": 2514 - }, - { - "epoch": 6.65, - "learning_rate": 1.695710455764075e-05, - "loss": 0.4581, - "step": 2515 - }, - { - "epoch": 6.66, - "learning_rate": 1.6943699731903486e-05, - "loss": 0.0005, - "step": 2516 - }, - { - "epoch": 6.66, - "learning_rate": 1.693029490616622e-05, - "loss": 0.0043, - "step": 2517 - }, - { - "epoch": 6.66, - "learning_rate": 1.6916890080428953e-05, - "loss": 0.0005, - "step": 2518 - }, - { - "epoch": 6.66, - "learning_rate": 1.6903485254691688e-05, - "loss": 0.0002, - "step": 2519 - }, - { - "epoch": 6.67, - "learning_rate": 1.6890080428954423e-05, - "loss": 0.0005, - "step": 2520 - }, - { - "epoch": 6.67, - "learning_rate": 1.687667560321716e-05, - "loss": 0.0037, - "step": 2521 - }, - { - "epoch": 6.67, - "learning_rate": 1.6863270777479893e-05, - "loss": 0.0003, - "step": 2522 - }, - { - "epoch": 6.67, - "learning_rate": 1.684986595174263e-05, - "loss": 0.0019, - "step": 2523 - }, - { - "epoch": 6.68, - "learning_rate": 1.6836461126005364e-05, - "loss": 0.0023, - "step": 2524 - }, - { - "epoch": 6.68, - "learning_rate": 1.68230563002681e-05, - "loss": 0.0004, - "step": 2525 - }, - { - "epoch": 6.68, - "learning_rate": 1.6809651474530834e-05, - "loss": 0.3317, - "step": 2526 - }, - { - "epoch": 6.69, - "learning_rate": 1.679624664879357e-05, - "loss": 0.0004, - "step": 2527 - }, - { - "epoch": 6.69, - "learning_rate": 1.67828418230563e-05, - "loss": 0.002, - "step": 2528 - }, - { - "epoch": 6.69, - "learning_rate": 1.6769436997319036e-05, - "loss": 0.0003, - "step": 2529 - }, - { - "epoch": 6.69, - "learning_rate": 1.675603217158177e-05, - "loss": 0.0007, - "step": 2530 - }, - { - "epoch": 6.7, - "learning_rate": 1.6742627345844506e-05, - "loss": 0.0012, - "step": 2531 - }, - { - "epoch": 6.7, - "learning_rate": 1.672922252010724e-05, - "loss": 0.024, - "step": 2532 - }, - { - "epoch": 6.7, - "learning_rate": 1.6715817694369973e-05, - "loss": 0.0041, - "step": 2533 - }, - { - "epoch": 6.7, - "learning_rate": 1.6702412868632708e-05, - "loss": 0.1821, - "step": 2534 - }, - { - "epoch": 6.71, - "learning_rate": 1.6689008042895443e-05, - "loss": 0.0004, - "step": 2535 - }, - { - "epoch": 6.71, - "learning_rate": 1.667560321715818e-05, - "loss": 0.0293, - "step": 2536 - }, - { - "epoch": 6.71, - "learning_rate": 1.6662198391420914e-05, - "loss": 0.0005, - "step": 2537 - }, - { - "epoch": 6.71, - "learning_rate": 1.6648793565683645e-05, - "loss": 0.0043, - "step": 2538 - }, - { - "epoch": 6.72, - "learning_rate": 1.663538873994638e-05, - "loss": 0.0112, - "step": 2539 - }, - { - "epoch": 6.72, - "learning_rate": 1.6621983914209116e-05, - "loss": 0.0003, - "step": 2540 - }, - { - "epoch": 6.72, - "learning_rate": 1.660857908847185e-05, - "loss": 0.0005, - "step": 2541 - }, - { - "epoch": 6.72, - "learning_rate": 1.6595174262734586e-05, - "loss": 0.4617, - "step": 2542 - }, - { - "epoch": 6.73, - "learning_rate": 1.6581769436997318e-05, - "loss": 0.0004, - "step": 2543 - }, - { - "epoch": 6.73, - "learning_rate": 1.6568364611260053e-05, - "loss": 0.1932, - "step": 2544 - }, - { - "epoch": 6.73, - "learning_rate": 1.6554959785522788e-05, - "loss": 0.0004, - "step": 2545 - }, - { - "epoch": 6.74, - "learning_rate": 1.6541554959785523e-05, - "loss": 0.0785, - "step": 2546 - }, - { - "epoch": 6.74, - "learning_rate": 1.6528150134048258e-05, - "loss": 0.0882, - "step": 2547 - }, - { - "epoch": 6.74, - "learning_rate": 1.651474530831099e-05, - "loss": 0.3937, - "step": 2548 - }, - { - "epoch": 6.74, - "learning_rate": 1.6501340482573725e-05, - "loss": 0.3401, - "step": 2549 - }, - { - "epoch": 6.75, - "learning_rate": 1.648793565683646e-05, - "loss": 0.026, - "step": 2550 - }, - { - "epoch": 6.75, - "learning_rate": 1.6474530831099195e-05, - "loss": 0.1959, - "step": 2551 - }, - { - "epoch": 6.75, - "learning_rate": 1.646112600536193e-05, - "loss": 0.0022, - "step": 2552 - }, - { - "epoch": 6.75, - "learning_rate": 1.6447721179624666e-05, - "loss": 0.0012, - "step": 2553 - }, - { - "epoch": 6.76, - "learning_rate": 1.64343163538874e-05, - "loss": 0.0064, - "step": 2554 - }, - { - "epoch": 6.76, - "learning_rate": 1.6420911528150136e-05, - "loss": 0.0105, - "step": 2555 - }, - { - "epoch": 6.76, - "learning_rate": 1.640750670241287e-05, - "loss": 0.0008, - "step": 2556 - }, - { - "epoch": 6.76, - "learning_rate": 1.6394101876675606e-05, - "loss": 0.0339, - "step": 2557 - }, - { - "epoch": 6.77, - "learning_rate": 1.6380697050938338e-05, - "loss": 0.1458, - "step": 2558 - }, - { - "epoch": 6.77, - "learning_rate": 1.6367292225201073e-05, - "loss": 0.2526, - "step": 2559 - }, - { - "epoch": 6.77, - "learning_rate": 1.6353887399463808e-05, - "loss": 0.038, - "step": 2560 - }, - { - "epoch": 6.78, - "learning_rate": 1.6340482573726543e-05, - "loss": 0.174, - "step": 2561 - }, - { - "epoch": 6.78, - "learning_rate": 1.632707774798928e-05, - "loss": 0.1936, - "step": 2562 - }, - { - "epoch": 6.78, - "learning_rate": 1.631367292225201e-05, - "loss": 0.0014, - "step": 2563 - }, - { - "epoch": 6.78, - "learning_rate": 1.6300268096514745e-05, - "loss": 0.0008, - "step": 2564 - }, - { - "epoch": 6.79, - "learning_rate": 1.628686327077748e-05, - "loss": 0.0616, - "step": 2565 - }, - { - "epoch": 6.79, - "learning_rate": 1.6273458445040215e-05, - "loss": 0.0029, - "step": 2566 - }, - { - "epoch": 6.79, - "learning_rate": 1.626005361930295e-05, - "loss": 0.1301, - "step": 2567 - }, - { - "epoch": 6.79, - "learning_rate": 1.6246648793565686e-05, - "loss": 0.0076, - "step": 2568 - }, - { - "epoch": 6.8, - "learning_rate": 1.6233243967828417e-05, - "loss": 0.038, - "step": 2569 - }, - { - "epoch": 6.8, - "learning_rate": 1.6219839142091153e-05, - "loss": 0.0376, - "step": 2570 - }, - { - "epoch": 6.8, - "learning_rate": 1.6206434316353888e-05, - "loss": 0.0007, - "step": 2571 - }, - { - "epoch": 6.8, - "learning_rate": 1.6193029490616623e-05, - "loss": 0.016, - "step": 2572 - }, - { - "epoch": 6.81, - "learning_rate": 1.6179624664879358e-05, - "loss": 0.0005, - "step": 2573 - }, - { - "epoch": 6.81, - "learning_rate": 1.616621983914209e-05, - "loss": 0.0024, - "step": 2574 - }, - { - "epoch": 6.81, - "learning_rate": 1.6152815013404825e-05, - "loss": 0.001, - "step": 2575 - }, - { - "epoch": 6.81, - "learning_rate": 1.613941018766756e-05, - "loss": 0.0009, - "step": 2576 - }, - { - "epoch": 6.82, - "learning_rate": 1.6126005361930295e-05, - "loss": 0.1889, - "step": 2577 - }, - { - "epoch": 6.82, - "learning_rate": 1.611260053619303e-05, - "loss": 0.5094, - "step": 2578 - }, - { - "epoch": 6.82, - "learning_rate": 1.6099195710455765e-05, - "loss": 0.017, - "step": 2579 - }, - { - "epoch": 6.83, - "learning_rate": 1.60857908847185e-05, - "loss": 0.029, - "step": 2580 - }, - { - "epoch": 6.83, - "learning_rate": 1.6072386058981236e-05, - "loss": 0.1249, - "step": 2581 - }, - { - "epoch": 6.83, - "learning_rate": 1.605898123324397e-05, - "loss": 0.2531, - "step": 2582 - }, - { - "epoch": 6.83, - "learning_rate": 1.6045576407506706e-05, - "loss": 0.0006, - "step": 2583 - }, - { - "epoch": 6.84, - "learning_rate": 1.6032171581769438e-05, - "loss": 0.0624, - "step": 2584 - }, - { - "epoch": 6.84, - "learning_rate": 1.6018766756032173e-05, - "loss": 0.0254, - "step": 2585 - }, - { - "epoch": 6.84, - "learning_rate": 1.6005361930294908e-05, - "loss": 0.0034, - "step": 2586 - }, - { - "epoch": 6.84, - "learning_rate": 1.5991957104557643e-05, - "loss": 0.0204, - "step": 2587 - }, - { - "epoch": 6.85, - "learning_rate": 1.5978552278820378e-05, - "loss": 0.002, - "step": 2588 - }, - { - "epoch": 6.85, - "learning_rate": 1.596514745308311e-05, - "loss": 0.0015, - "step": 2589 - }, - { - "epoch": 6.85, - "learning_rate": 1.5951742627345845e-05, - "loss": 0.0465, - "step": 2590 - }, - { - "epoch": 6.85, - "learning_rate": 1.593833780160858e-05, - "loss": 0.1892, - "step": 2591 - }, - { - "epoch": 6.86, - "learning_rate": 1.5924932975871315e-05, - "loss": 0.0932, - "step": 2592 - }, - { - "epoch": 6.86, - "learning_rate": 1.591152815013405e-05, - "loss": 0.0015, - "step": 2593 - }, - { - "epoch": 6.86, - "learning_rate": 1.5898123324396782e-05, - "loss": 0.0062, - "step": 2594 - }, - { - "epoch": 6.87, - "learning_rate": 1.5884718498659517e-05, - "loss": 0.0731, - "step": 2595 - }, - { - "epoch": 6.87, - "learning_rate": 1.5871313672922252e-05, - "loss": 0.002, - "step": 2596 - }, - { - "epoch": 6.87, - "learning_rate": 1.5857908847184988e-05, - "loss": 0.0484, - "step": 2597 - }, - { - "epoch": 6.87, - "learning_rate": 1.5844504021447723e-05, - "loss": 0.0082, - "step": 2598 - }, - { - "epoch": 6.88, - "learning_rate": 1.5831099195710454e-05, - "loss": 0.0213, - "step": 2599 - }, - { - "epoch": 6.88, - "learning_rate": 1.581769436997319e-05, - "loss": 0.1612, - "step": 2600 - }, - { - "epoch": 6.88, - "learning_rate": 1.5804289544235925e-05, - "loss": 0.184, - "step": 2601 - }, - { - "epoch": 6.88, - "learning_rate": 1.579088471849866e-05, - "loss": 0.1413, - "step": 2602 - }, - { - "epoch": 6.89, - "learning_rate": 1.5777479892761395e-05, - "loss": 0.0019, - "step": 2603 - }, - { - "epoch": 6.89, - "learning_rate": 1.5764075067024127e-05, - "loss": 0.0047, - "step": 2604 - }, - { - "epoch": 6.89, - "learning_rate": 1.5750670241286862e-05, - "loss": 0.0409, - "step": 2605 - }, - { - "epoch": 6.89, - "learning_rate": 1.5737265415549597e-05, - "loss": 0.0379, - "step": 2606 - }, - { - "epoch": 6.9, - "learning_rate": 1.5723860589812332e-05, - "loss": 0.0005, - "step": 2607 - }, - { - "epoch": 6.9, - "learning_rate": 1.5710455764075067e-05, - "loss": 0.0332, - "step": 2608 - }, - { - "epoch": 6.9, - "learning_rate": 1.5697050938337802e-05, - "loss": 0.0543, - "step": 2609 - }, - { - "epoch": 6.9, - "learning_rate": 1.5683646112600538e-05, - "loss": 0.0009, - "step": 2610 - }, - { - "epoch": 6.91, - "learning_rate": 1.5670241286863273e-05, - "loss": 0.016, - "step": 2611 - }, - { - "epoch": 6.91, - "learning_rate": 1.5656836461126008e-05, - "loss": 0.0035, - "step": 2612 - }, - { - "epoch": 6.91, - "learning_rate": 1.5643431635388743e-05, - "loss": 0.0713, - "step": 2613 - }, - { - "epoch": 6.92, - "learning_rate": 1.5630026809651475e-05, - "loss": 0.0022, - "step": 2614 - }, - { - "epoch": 6.92, - "learning_rate": 1.561662198391421e-05, - "loss": 0.0005, - "step": 2615 - }, - { - "epoch": 6.92, - "learning_rate": 1.5603217158176945e-05, - "loss": 0.0009, - "step": 2616 - }, - { - "epoch": 6.92, - "learning_rate": 1.558981233243968e-05, - "loss": 0.0016, - "step": 2617 - }, - { - "epoch": 6.93, - "learning_rate": 1.5576407506702415e-05, - "loss": 0.0017, - "step": 2618 - }, - { - "epoch": 6.93, - "learning_rate": 1.5563002680965147e-05, - "loss": 0.0094, - "step": 2619 - }, - { - "epoch": 6.93, - "learning_rate": 1.5549597855227882e-05, - "loss": 0.016, - "step": 2620 - }, - { - "epoch": 6.93, - "learning_rate": 1.5536193029490617e-05, - "loss": 0.0005, - "step": 2621 - }, - { - "epoch": 6.94, - "learning_rate": 1.5522788203753352e-05, - "loss": 0.0549, - "step": 2622 - }, - { - "epoch": 6.94, - "learning_rate": 1.5509383378016087e-05, - "loss": 0.3791, - "step": 2623 - }, - { - "epoch": 6.94, - "learning_rate": 1.549597855227882e-05, - "loss": 0.0003, - "step": 2624 - }, - { - "epoch": 6.94, - "learning_rate": 1.5482573726541554e-05, - "loss": 0.0774, - "step": 2625 - }, - { - "epoch": 6.95, - "learning_rate": 1.546916890080429e-05, - "loss": 0.0879, - "step": 2626 - }, - { - "epoch": 6.95, - "learning_rate": 1.5455764075067025e-05, - "loss": 0.0007, - "step": 2627 - }, - { - "epoch": 6.95, - "learning_rate": 1.544235924932976e-05, - "loss": 0.0047, - "step": 2628 - }, - { - "epoch": 6.96, - "learning_rate": 1.542895442359249e-05, - "loss": 0.0011, - "step": 2629 - }, - { - "epoch": 6.96, - "learning_rate": 1.5415549597855227e-05, - "loss": 0.0004, - "step": 2630 - }, - { - "epoch": 6.96, - "learning_rate": 1.5402144772117962e-05, - "loss": 0.4962, - "step": 2631 - }, - { - "epoch": 6.96, - "learning_rate": 1.5388739946380697e-05, - "loss": 0.1182, - "step": 2632 - }, - { - "epoch": 6.97, - "learning_rate": 1.5375335120643432e-05, - "loss": 0.0269, - "step": 2633 - }, - { - "epoch": 6.97, - "learning_rate": 1.5361930294906167e-05, - "loss": 0.0157, - "step": 2634 - }, - { - "epoch": 6.97, - "learning_rate": 1.5348525469168902e-05, - "loss": 0.0022, - "step": 2635 - }, - { - "epoch": 6.97, - "learning_rate": 1.5335120643431637e-05, - "loss": 0.3299, - "step": 2636 - }, - { - "epoch": 6.98, - "learning_rate": 1.5321715817694372e-05, - "loss": 0.0529, - "step": 2637 - }, - { - "epoch": 6.98, - "learning_rate": 1.5308310991957108e-05, - "loss": 0.1396, - "step": 2638 - }, - { - "epoch": 6.98, - "learning_rate": 1.5294906166219843e-05, - "loss": 0.0008, - "step": 2639 - }, - { - "epoch": 6.98, - "learning_rate": 1.5281501340482574e-05, - "loss": 0.0086, - "step": 2640 - }, - { - "epoch": 6.99, - "learning_rate": 1.526809651474531e-05, - "loss": 0.0036, - "step": 2641 - }, - { - "epoch": 6.99, - "learning_rate": 1.5254691689008043e-05, - "loss": 0.0149, - "step": 2642 - }, - { - "epoch": 6.99, - "learning_rate": 1.5241286863270778e-05, - "loss": 0.0011, - "step": 2643 - }, - { - "epoch": 6.99, - "learning_rate": 1.5227882037533513e-05, - "loss": 0.0003, - "step": 2644 - }, - { - "epoch": 7.0, - "learning_rate": 1.5214477211796247e-05, - "loss": 0.0064, - "step": 2645 - }, - { - "epoch": 7.0, - "learning_rate": 1.5201072386058982e-05, - "loss": 0.0281, - "step": 2646 - }, - { - "epoch": 7.0, - "eval_f1": 0.7856000000000002, - "eval_loss": 1.1071351766586304, - "eval_runtime": 1.8613, - "eval_samples_per_second": 812.89, - "eval_steps_per_second": 51.041, - "step": 2646 - }, - { - "epoch": 7.0, - "learning_rate": 1.5187667560321717e-05, - "loss": 0.0049, - "step": 2647 - }, - { - "epoch": 7.01, - "learning_rate": 1.5174262734584452e-05, - "loss": 0.001, - "step": 2648 - }, - { - "epoch": 7.01, - "learning_rate": 1.5160857908847187e-05, - "loss": 0.0004, - "step": 2649 - }, - { - "epoch": 7.01, - "learning_rate": 1.5147453083109919e-05, - "loss": 0.022, - "step": 2650 - }, - { - "epoch": 7.01, - "learning_rate": 1.5134048257372654e-05, - "loss": 0.0126, - "step": 2651 - }, - { - "epoch": 7.02, - "learning_rate": 1.512064343163539e-05, - "loss": 0.0006, - "step": 2652 - }, - { - "epoch": 7.02, - "learning_rate": 1.5107238605898124e-05, - "loss": 0.0182, - "step": 2653 - }, - { - "epoch": 7.02, - "learning_rate": 1.509383378016086e-05, - "loss": 0.0004, - "step": 2654 - }, - { - "epoch": 7.02, - "learning_rate": 1.5080428954423593e-05, - "loss": 0.2763, - "step": 2655 - }, - { - "epoch": 7.03, - "learning_rate": 1.5067024128686328e-05, - "loss": 0.0025, - "step": 2656 - }, - { - "epoch": 7.03, - "learning_rate": 1.5053619302949063e-05, - "loss": 0.0102, - "step": 2657 - }, - { - "epoch": 7.03, - "learning_rate": 1.5040214477211798e-05, - "loss": 0.008, - "step": 2658 - }, - { - "epoch": 7.03, - "learning_rate": 1.5026809651474534e-05, - "loss": 0.0005, - "step": 2659 - }, - { - "epoch": 7.04, - "learning_rate": 1.5013404825737265e-05, - "loss": 0.0006, - "step": 2660 - }, - { - "epoch": 7.04, - "learning_rate": 1.5e-05, - "loss": 0.0003, - "step": 2661 - }, - { - "epoch": 7.04, - "learning_rate": 1.4986595174262736e-05, - "loss": 0.0013, - "step": 2662 - }, - { - "epoch": 7.04, - "learning_rate": 1.497319034852547e-05, - "loss": 0.0033, - "step": 2663 - }, - { - "epoch": 7.05, - "learning_rate": 1.4959785522788206e-05, - "loss": 0.0004, - "step": 2664 - }, - { - "epoch": 7.05, - "learning_rate": 1.4946380697050938e-05, - "loss": 0.0347, - "step": 2665 - }, - { - "epoch": 7.05, - "learning_rate": 1.4932975871313673e-05, - "loss": 0.0208, - "step": 2666 - }, - { - "epoch": 7.06, - "learning_rate": 1.4919571045576408e-05, - "loss": 0.3783, - "step": 2667 - }, - { - "epoch": 7.06, - "learning_rate": 1.4906166219839143e-05, - "loss": 0.0005, - "step": 2668 - }, - { - "epoch": 7.06, - "learning_rate": 1.4892761394101878e-05, - "loss": 0.2023, - "step": 2669 - }, - { - "epoch": 7.06, - "learning_rate": 1.4879356568364611e-05, - "loss": 0.0007, - "step": 2670 - }, - { - "epoch": 7.07, - "learning_rate": 1.4865951742627347e-05, - "loss": 0.0014, - "step": 2671 - }, - { - "epoch": 7.07, - "learning_rate": 1.4852546916890082e-05, - "loss": 0.0018, - "step": 2672 - }, - { - "epoch": 7.07, - "learning_rate": 1.4839142091152817e-05, - "loss": 0.0004, - "step": 2673 - }, - { - "epoch": 7.07, - "learning_rate": 1.4825737265415552e-05, - "loss": 0.0005, - "step": 2674 - }, - { - "epoch": 7.08, - "learning_rate": 1.4812332439678284e-05, - "loss": 0.0235, - "step": 2675 - }, - { - "epoch": 7.08, - "learning_rate": 1.4798927613941019e-05, - "loss": 0.0116, - "step": 2676 - }, - { - "epoch": 7.08, - "learning_rate": 1.4785522788203754e-05, - "loss": 0.0005, - "step": 2677 - }, - { - "epoch": 7.08, - "learning_rate": 1.4772117962466489e-05, - "loss": 0.0011, - "step": 2678 - }, - { - "epoch": 7.09, - "learning_rate": 1.4758713136729224e-05, - "loss": 0.2959, - "step": 2679 - }, - { - "epoch": 7.09, - "learning_rate": 1.4745308310991956e-05, - "loss": 0.2646, - "step": 2680 - }, - { - "epoch": 7.09, - "learning_rate": 1.4731903485254691e-05, - "loss": 0.1224, - "step": 2681 - }, - { - "epoch": 7.1, - "learning_rate": 1.4718498659517426e-05, - "loss": 0.0419, - "step": 2682 - }, - { - "epoch": 7.1, - "learning_rate": 1.4705093833780161e-05, - "loss": 0.0218, - "step": 2683 - }, - { - "epoch": 7.1, - "learning_rate": 1.4691689008042897e-05, - "loss": 0.0003, - "step": 2684 - }, - { - "epoch": 7.1, - "learning_rate": 1.467828418230563e-05, - "loss": 0.0005, - "step": 2685 - }, - { - "epoch": 7.11, - "learning_rate": 1.4664879356568365e-05, - "loss": 0.2352, - "step": 2686 - }, - { - "epoch": 7.11, - "learning_rate": 1.46514745308311e-05, - "loss": 0.0004, - "step": 2687 - }, - { - "epoch": 7.11, - "learning_rate": 1.4638069705093835e-05, - "loss": 0.0004, - "step": 2688 - }, - { - "epoch": 7.11, - "learning_rate": 1.462466487935657e-05, - "loss": 0.1069, - "step": 2689 - }, - { - "epoch": 7.12, - "learning_rate": 1.4611260053619302e-05, - "loss": 0.008, - "step": 2690 - }, - { - "epoch": 7.12, - "learning_rate": 1.4597855227882037e-05, - "loss": 0.0007, - "step": 2691 - }, - { - "epoch": 7.12, - "learning_rate": 1.4584450402144772e-05, - "loss": 0.002, - "step": 2692 - }, - { - "epoch": 7.12, - "learning_rate": 1.4571045576407508e-05, - "loss": 0.0029, - "step": 2693 - }, - { - "epoch": 7.13, - "learning_rate": 1.4557640750670243e-05, - "loss": 0.0221, - "step": 2694 - }, - { - "epoch": 7.13, - "learning_rate": 1.4544235924932978e-05, - "loss": 0.0085, - "step": 2695 - }, - { - "epoch": 7.13, - "learning_rate": 1.453083109919571e-05, - "loss": 0.0009, - "step": 2696 - }, - { - "epoch": 7.13, - "learning_rate": 1.4517426273458445e-05, - "loss": 0.0018, - "step": 2697 - }, - { - "epoch": 7.14, - "learning_rate": 1.450402144772118e-05, - "loss": 0.001, - "step": 2698 - }, - { - "epoch": 7.14, - "learning_rate": 1.4490616621983915e-05, - "loss": 0.0021, - "step": 2699 - }, - { - "epoch": 7.14, - "learning_rate": 1.447721179624665e-05, - "loss": 0.0225, - "step": 2700 - }, - { - "epoch": 7.15, - "learning_rate": 1.4463806970509384e-05, - "loss": 0.0005, - "step": 2701 - }, - { - "epoch": 7.15, - "learning_rate": 1.4450402144772119e-05, - "loss": 0.0057, - "step": 2702 - }, - { - "epoch": 7.15, - "learning_rate": 1.4436997319034854e-05, - "loss": 0.0422, - "step": 2703 - }, - { - "epoch": 7.15, - "learning_rate": 1.4423592493297589e-05, - "loss": 0.0028, - "step": 2704 - }, - { - "epoch": 7.16, - "learning_rate": 1.4410187667560324e-05, - "loss": 0.0231, - "step": 2705 - }, - { - "epoch": 7.16, - "learning_rate": 1.4396782841823056e-05, - "loss": 0.1236, - "step": 2706 - }, - { - "epoch": 7.16, - "learning_rate": 1.4383378016085791e-05, - "loss": 0.0004, - "step": 2707 - }, - { - "epoch": 7.16, - "learning_rate": 1.4369973190348526e-05, - "loss": 0.0019, - "step": 2708 - }, - { - "epoch": 7.17, - "learning_rate": 1.4356568364611261e-05, - "loss": 0.0029, - "step": 2709 - }, - { - "epoch": 7.17, - "learning_rate": 1.4343163538873996e-05, - "loss": 0.0005, - "step": 2710 - }, - { - "epoch": 7.17, - "learning_rate": 1.4329758713136728e-05, - "loss": 0.0665, - "step": 2711 - }, - { - "epoch": 7.17, - "learning_rate": 1.4316353887399463e-05, - "loss": 0.0005, - "step": 2712 - }, - { - "epoch": 7.18, - "learning_rate": 1.4302949061662198e-05, - "loss": 0.0107, - "step": 2713 - }, - { - "epoch": 7.18, - "learning_rate": 1.4289544235924934e-05, - "loss": 0.0005, - "step": 2714 - }, - { - "epoch": 7.18, - "learning_rate": 1.4276139410187669e-05, - "loss": 0.1983, - "step": 2715 - }, - { - "epoch": 7.19, - "learning_rate": 1.4262734584450402e-05, - "loss": 0.0016, - "step": 2716 - }, - { - "epoch": 7.19, - "learning_rate": 1.4249329758713137e-05, - "loss": 0.0003, - "step": 2717 - }, - { - "epoch": 7.19, - "learning_rate": 1.4235924932975872e-05, - "loss": 0.0247, - "step": 2718 - }, - { - "epoch": 7.19, - "learning_rate": 1.4222520107238607e-05, - "loss": 0.0079, - "step": 2719 - }, - { - "epoch": 7.2, - "learning_rate": 1.4209115281501343e-05, - "loss": 0.0012, - "step": 2720 - }, - { - "epoch": 7.2, - "learning_rate": 1.4195710455764074e-05, - "loss": 0.0004, - "step": 2721 - }, - { - "epoch": 7.2, - "learning_rate": 1.418230563002681e-05, - "loss": 0.0004, - "step": 2722 - }, - { - "epoch": 7.2, - "learning_rate": 1.4168900804289545e-05, - "loss": 0.0051, - "step": 2723 - }, - { - "epoch": 7.21, - "learning_rate": 1.415549597855228e-05, - "loss": 0.0006, - "step": 2724 - }, - { - "epoch": 7.21, - "learning_rate": 1.4142091152815015e-05, - "loss": 0.0043, - "step": 2725 - }, - { - "epoch": 7.21, - "learning_rate": 1.4128686327077748e-05, - "loss": 0.004, - "step": 2726 - }, - { - "epoch": 7.21, - "learning_rate": 1.4115281501340483e-05, - "loss": 0.2211, - "step": 2727 - }, - { - "epoch": 7.22, - "learning_rate": 1.4101876675603219e-05, - "loss": 0.0003, - "step": 2728 - }, - { - "epoch": 7.22, - "learning_rate": 1.4088471849865954e-05, - "loss": 0.0004, - "step": 2729 - }, - { - "epoch": 7.22, - "learning_rate": 1.4075067024128689e-05, - "loss": 0.2051, - "step": 2730 - }, - { - "epoch": 7.22, - "learning_rate": 1.406166219839142e-05, - "loss": 0.0003, - "step": 2731 - }, - { - "epoch": 7.23, - "learning_rate": 1.4048257372654156e-05, - "loss": 0.0014, - "step": 2732 - }, - { - "epoch": 7.23, - "learning_rate": 1.403485254691689e-05, - "loss": 0.0007, - "step": 2733 - }, - { - "epoch": 7.23, - "learning_rate": 1.4021447721179626e-05, - "loss": 0.0068, - "step": 2734 - }, - { - "epoch": 7.24, - "learning_rate": 1.4008042895442361e-05, - "loss": 0.137, - "step": 2735 - }, - { - "epoch": 7.24, - "learning_rate": 1.3994638069705093e-05, - "loss": 0.0005, - "step": 2736 - }, - { - "epoch": 7.24, - "learning_rate": 1.3981233243967828e-05, - "loss": 0.0006, - "step": 2737 - }, - { - "epoch": 7.24, - "learning_rate": 1.3967828418230563e-05, - "loss": 0.0206, - "step": 2738 - }, - { - "epoch": 7.25, - "learning_rate": 1.3954423592493298e-05, - "loss": 0.1488, - "step": 2739 - }, - { - "epoch": 7.25, - "learning_rate": 1.3941018766756033e-05, - "loss": 0.0054, - "step": 2740 - }, - { - "epoch": 7.25, - "learning_rate": 1.3927613941018767e-05, - "loss": 0.0269, - "step": 2741 - }, - { - "epoch": 7.25, - "learning_rate": 1.3914209115281502e-05, - "loss": 0.0006, - "step": 2742 - }, - { - "epoch": 7.26, - "learning_rate": 1.3900804289544237e-05, - "loss": 0.0003, - "step": 2743 - }, - { - "epoch": 7.26, - "learning_rate": 1.3887399463806972e-05, - "loss": 0.0004, - "step": 2744 - }, - { - "epoch": 7.26, - "learning_rate": 1.3873994638069707e-05, - "loss": 0.0003, - "step": 2745 - }, - { - "epoch": 7.26, - "learning_rate": 1.3860589812332439e-05, - "loss": 0.0027, - "step": 2746 - }, - { - "epoch": 7.27, - "learning_rate": 1.3847184986595174e-05, - "loss": 0.0006, - "step": 2747 - }, - { - "epoch": 7.27, - "learning_rate": 1.383378016085791e-05, - "loss": 0.0012, - "step": 2748 - }, - { - "epoch": 7.27, - "learning_rate": 1.3820375335120644e-05, - "loss": 0.0522, - "step": 2749 - }, - { - "epoch": 7.28, - "learning_rate": 1.380697050938338e-05, - "loss": 0.0126, - "step": 2750 - }, - { - "epoch": 7.28, - "learning_rate": 1.3793565683646111e-05, - "loss": 0.0083, - "step": 2751 - }, - { - "epoch": 7.28, - "learning_rate": 1.3780160857908846e-05, - "loss": 0.074, - "step": 2752 - }, - { - "epoch": 7.28, - "learning_rate": 1.3766756032171582e-05, - "loss": 0.0002, - "step": 2753 - }, - { - "epoch": 7.29, - "learning_rate": 1.3753351206434317e-05, - "loss": 0.1009, - "step": 2754 - }, - { - "epoch": 7.29, - "learning_rate": 1.3739946380697052e-05, - "loss": 0.0021, - "step": 2755 - }, - { - "epoch": 7.29, - "learning_rate": 1.3726541554959787e-05, - "loss": 0.0082, - "step": 2756 - }, - { - "epoch": 7.29, - "learning_rate": 1.371313672922252e-05, - "loss": 0.0004, - "step": 2757 - }, - { - "epoch": 7.3, - "learning_rate": 1.3699731903485256e-05, - "loss": 0.0006, - "step": 2758 - }, - { - "epoch": 7.3, - "learning_rate": 1.368632707774799e-05, - "loss": 0.0173, - "step": 2759 - }, - { - "epoch": 7.3, - "learning_rate": 1.3672922252010726e-05, - "loss": 0.0147, - "step": 2760 - }, - { - "epoch": 7.3, - "learning_rate": 1.3659517426273461e-05, - "loss": 0.1293, - "step": 2761 - }, - { - "epoch": 7.31, - "learning_rate": 1.3646112600536193e-05, - "loss": 0.2566, - "step": 2762 - }, - { - "epoch": 7.31, - "learning_rate": 1.3632707774798928e-05, - "loss": 0.0026, - "step": 2763 - }, - { - "epoch": 7.31, - "learning_rate": 1.3619302949061663e-05, - "loss": 0.0031, - "step": 2764 - }, - { - "epoch": 7.31, - "learning_rate": 1.3605898123324398e-05, - "loss": 0.0029, - "step": 2765 - }, - { - "epoch": 7.32, - "learning_rate": 1.3592493297587133e-05, - "loss": 0.0005, - "step": 2766 - }, - { - "epoch": 7.32, - "learning_rate": 1.3579088471849865e-05, - "loss": 0.0004, - "step": 2767 - }, - { - "epoch": 7.32, - "learning_rate": 1.35656836461126e-05, - "loss": 0.0294, - "step": 2768 - }, - { - "epoch": 7.33, - "learning_rate": 1.3552278820375335e-05, - "loss": 0.0011, - "step": 2769 - }, - { - "epoch": 7.33, - "learning_rate": 1.353887399463807e-05, - "loss": 0.009, - "step": 2770 - }, - { - "epoch": 7.33, - "learning_rate": 1.3525469168900805e-05, - "loss": 0.0003, - "step": 2771 - }, - { - "epoch": 7.33, - "learning_rate": 1.3512064343163539e-05, - "loss": 0.0003, - "step": 2772 - }, - { - "epoch": 7.34, - "learning_rate": 1.3498659517426274e-05, - "loss": 0.0002, - "step": 2773 - }, - { - "epoch": 7.34, - "learning_rate": 1.348525469168901e-05, - "loss": 0.0002, - "step": 2774 - }, - { - "epoch": 7.34, - "learning_rate": 1.3471849865951744e-05, - "loss": 0.1261, - "step": 2775 - }, - { - "epoch": 7.34, - "learning_rate": 1.345844504021448e-05, - "loss": 0.0006, - "step": 2776 - }, - { - "epoch": 7.35, - "learning_rate": 1.3445040214477211e-05, - "loss": 0.0006, - "step": 2777 - }, - { - "epoch": 7.35, - "learning_rate": 1.3431635388739946e-05, - "loss": 0.0003, - "step": 2778 - }, - { - "epoch": 7.35, - "learning_rate": 1.3418230563002681e-05, - "loss": 0.0754, - "step": 2779 - }, - { - "epoch": 7.35, - "learning_rate": 1.3404825737265417e-05, - "loss": 0.0002, - "step": 2780 - }, - { - "epoch": 7.36, - "learning_rate": 1.3391420911528152e-05, - "loss": 0.0007, - "step": 2781 - }, - { - "epoch": 7.36, - "learning_rate": 1.3378016085790885e-05, - "loss": 0.0004, - "step": 2782 - }, - { - "epoch": 7.36, - "learning_rate": 1.336461126005362e-05, - "loss": 0.001, - "step": 2783 - }, - { - "epoch": 7.37, - "learning_rate": 1.3351206434316355e-05, - "loss": 0.0006, - "step": 2784 - }, - { - "epoch": 7.37, - "learning_rate": 1.333780160857909e-05, - "loss": 0.0227, - "step": 2785 - }, - { - "epoch": 7.37, - "learning_rate": 1.3324396782841826e-05, - "loss": 0.0002, - "step": 2786 - }, - { - "epoch": 7.37, - "learning_rate": 1.3310991957104557e-05, - "loss": 0.0002, - "step": 2787 - }, - { - "epoch": 7.38, - "learning_rate": 1.3297587131367293e-05, - "loss": 0.1036, - "step": 2788 - }, - { - "epoch": 7.38, - "learning_rate": 1.3284182305630028e-05, - "loss": 0.0014, - "step": 2789 - }, - { - "epoch": 7.38, - "learning_rate": 1.3270777479892763e-05, - "loss": 0.35, - "step": 2790 - }, - { - "epoch": 7.38, - "learning_rate": 1.3257372654155498e-05, - "loss": 0.0003, - "step": 2791 - }, - { - "epoch": 7.39, - "learning_rate": 1.324396782841823e-05, - "loss": 0.0182, - "step": 2792 - }, - { - "epoch": 7.39, - "learning_rate": 1.3230563002680965e-05, - "loss": 0.0038, - "step": 2793 - }, - { - "epoch": 7.39, - "learning_rate": 1.32171581769437e-05, - "loss": 0.0003, - "step": 2794 - }, - { - "epoch": 7.39, - "learning_rate": 1.3203753351206435e-05, - "loss": 0.0003, - "step": 2795 - }, - { - "epoch": 7.4, - "learning_rate": 1.319034852546917e-05, - "loss": 0.0008, - "step": 2796 - }, - { - "epoch": 7.4, - "learning_rate": 1.3176943699731904e-05, - "loss": 0.0003, - "step": 2797 - }, - { - "epoch": 7.4, - "learning_rate": 1.3163538873994639e-05, - "loss": 0.0005, - "step": 2798 - }, - { - "epoch": 7.4, - "learning_rate": 1.3150134048257374e-05, - "loss": 0.2165, - "step": 2799 - }, - { - "epoch": 7.41, - "learning_rate": 1.3136729222520109e-05, - "loss": 0.023, - "step": 2800 - }, - { - "epoch": 7.41, - "learning_rate": 1.3123324396782844e-05, - "loss": 0.0047, - "step": 2801 - }, - { - "epoch": 7.41, - "learning_rate": 1.3109919571045576e-05, - "loss": 0.1507, - "step": 2802 - }, - { - "epoch": 7.42, - "learning_rate": 1.3096514745308311e-05, - "loss": 0.2509, - "step": 2803 - }, - { - "epoch": 7.42, - "learning_rate": 1.3083109919571046e-05, - "loss": 0.0085, - "step": 2804 - }, - { - "epoch": 7.42, - "learning_rate": 1.3069705093833781e-05, - "loss": 0.2183, - "step": 2805 - }, - { - "epoch": 7.42, - "learning_rate": 1.3056300268096516e-05, - "loss": 0.0007, - "step": 2806 - }, - { - "epoch": 7.43, - "learning_rate": 1.3042895442359248e-05, - "loss": 0.0005, - "step": 2807 - }, - { - "epoch": 7.43, - "learning_rate": 1.3029490616621983e-05, - "loss": 0.1291, - "step": 2808 - }, - { - "epoch": 7.43, - "learning_rate": 1.3016085790884718e-05, - "loss": 0.1037, - "step": 2809 - }, - { - "epoch": 7.43, - "learning_rate": 1.3002680965147454e-05, - "loss": 0.0147, - "step": 2810 - }, - { - "epoch": 7.44, - "learning_rate": 1.2989276139410189e-05, - "loss": 0.0006, - "step": 2811 - }, - { - "epoch": 7.44, - "learning_rate": 1.2975871313672922e-05, - "loss": 0.0148, - "step": 2812 - }, - { - "epoch": 7.44, - "learning_rate": 1.2962466487935657e-05, - "loss": 0.0129, - "step": 2813 - }, - { - "epoch": 7.44, - "learning_rate": 1.2949061662198392e-05, - "loss": 0.0276, - "step": 2814 - }, - { - "epoch": 7.45, - "learning_rate": 1.2935656836461127e-05, - "loss": 0.0007, - "step": 2815 - }, - { - "epoch": 7.45, - "learning_rate": 1.2922252010723863e-05, - "loss": 0.0006, - "step": 2816 - }, - { - "epoch": 7.45, - "learning_rate": 1.2908847184986598e-05, - "loss": 0.0002, - "step": 2817 - }, - { - "epoch": 7.46, - "learning_rate": 1.289544235924933e-05, - "loss": 0.1274, - "step": 2818 - }, - { - "epoch": 7.46, - "learning_rate": 1.2882037533512065e-05, - "loss": 0.0009, - "step": 2819 - }, - { - "epoch": 7.46, - "learning_rate": 1.28686327077748e-05, - "loss": 0.0007, - "step": 2820 - }, - { - "epoch": 7.46, - "learning_rate": 1.2855227882037535e-05, - "loss": 0.002, - "step": 2821 - }, - { - "epoch": 7.47, - "learning_rate": 1.284182305630027e-05, - "loss": 0.0004, - "step": 2822 - }, - { - "epoch": 7.47, - "learning_rate": 1.2828418230563002e-05, - "loss": 0.0017, - "step": 2823 - }, - { - "epoch": 7.47, - "learning_rate": 1.2815013404825737e-05, - "loss": 0.001, - "step": 2824 - }, - { - "epoch": 7.47, - "learning_rate": 1.2801608579088472e-05, - "loss": 0.0106, - "step": 2825 - }, - { - "epoch": 7.48, - "learning_rate": 1.2788203753351207e-05, - "loss": 0.1158, - "step": 2826 - }, - { - "epoch": 7.48, - "learning_rate": 1.2774798927613942e-05, - "loss": 0.0004, - "step": 2827 - }, - { - "epoch": 7.48, - "learning_rate": 1.2761394101876676e-05, - "loss": 0.3214, - "step": 2828 - }, - { - "epoch": 7.48, - "learning_rate": 1.274798927613941e-05, - "loss": 0.0003, - "step": 2829 - }, - { - "epoch": 7.49, - "learning_rate": 1.2734584450402146e-05, - "loss": 0.0417, - "step": 2830 - }, - { - "epoch": 7.49, - "learning_rate": 1.2721179624664881e-05, - "loss": 0.0002, - "step": 2831 - }, - { - "epoch": 7.49, - "learning_rate": 1.2707774798927616e-05, - "loss": 0.0004, - "step": 2832 - }, - { - "epoch": 7.49, - "learning_rate": 1.2694369973190348e-05, - "loss": 0.1166, - "step": 2833 - }, - { - "epoch": 7.5, - "learning_rate": 1.2680965147453083e-05, - "loss": 0.0008, - "step": 2834 - }, - { - "epoch": 7.5, - "learning_rate": 1.2667560321715818e-05, - "loss": 0.0005, - "step": 2835 - }, - { - "epoch": 7.5, - "learning_rate": 1.2654155495978553e-05, - "loss": 0.0191, - "step": 2836 - }, - { - "epoch": 7.51, - "learning_rate": 1.2640750670241289e-05, - "loss": 0.0642, - "step": 2837 - }, - { - "epoch": 7.51, - "learning_rate": 1.262734584450402e-05, - "loss": 0.0256, - "step": 2838 - }, - { - "epoch": 7.51, - "learning_rate": 1.2613941018766755e-05, - "loss": 0.0007, - "step": 2839 - }, - { - "epoch": 7.51, - "learning_rate": 1.260053619302949e-05, - "loss": 0.049, - "step": 2840 - }, - { - "epoch": 7.52, - "learning_rate": 1.2587131367292226e-05, - "loss": 0.0012, - "step": 2841 - }, - { - "epoch": 7.52, - "learning_rate": 1.257372654155496e-05, - "loss": 0.0006, - "step": 2842 - }, - { - "epoch": 7.52, - "learning_rate": 1.2560321715817694e-05, - "loss": 0.2299, - "step": 2843 - }, - { - "epoch": 7.52, - "learning_rate": 1.254691689008043e-05, - "loss": 0.0006, - "step": 2844 - }, - { - "epoch": 7.53, - "learning_rate": 1.2533512064343164e-05, - "loss": 0.0346, - "step": 2845 - }, - { - "epoch": 7.53, - "learning_rate": 1.25201072386059e-05, - "loss": 0.0021, - "step": 2846 - }, - { - "epoch": 7.53, - "learning_rate": 1.2506702412868635e-05, - "loss": 0.0003, - "step": 2847 - }, - { - "epoch": 7.53, - "learning_rate": 1.2493297587131368e-05, - "loss": 0.135, - "step": 2848 - }, - { - "epoch": 7.54, - "learning_rate": 1.2479892761394102e-05, - "loss": 0.0003, - "step": 2849 - }, - { - "epoch": 7.54, - "learning_rate": 1.2466487935656837e-05, - "loss": 0.0005, - "step": 2850 - }, - { - "epoch": 7.54, - "learning_rate": 1.2453083109919572e-05, - "loss": 0.0005, - "step": 2851 - }, - { - "epoch": 7.54, - "learning_rate": 1.2439678284182305e-05, - "loss": 0.0007, - "step": 2852 - }, - { - "epoch": 7.55, - "learning_rate": 1.242627345844504e-05, - "loss": 0.0004, - "step": 2853 - }, - { - "epoch": 7.55, - "learning_rate": 1.2412868632707776e-05, - "loss": 0.0003, - "step": 2854 - }, - { - "epoch": 7.55, - "learning_rate": 1.239946380697051e-05, - "loss": 0.0003, - "step": 2855 - }, - { - "epoch": 7.56, - "learning_rate": 1.2386058981233246e-05, - "loss": 0.0006, - "step": 2856 - }, - { - "epoch": 7.56, - "learning_rate": 1.237265415549598e-05, - "loss": 0.0171, - "step": 2857 - }, - { - "epoch": 7.56, - "learning_rate": 1.2359249329758714e-05, - "loss": 0.1066, - "step": 2858 - }, - { - "epoch": 7.56, - "learning_rate": 1.2345844504021448e-05, - "loss": 0.0003, - "step": 2859 - }, - { - "epoch": 7.57, - "learning_rate": 1.2332439678284183e-05, - "loss": 0.1106, - "step": 2860 - }, - { - "epoch": 7.57, - "learning_rate": 1.2319034852546918e-05, - "loss": 0.0004, - "step": 2861 - }, - { - "epoch": 7.57, - "learning_rate": 1.2305630026809652e-05, - "loss": 0.0012, - "step": 2862 - }, - { - "epoch": 7.57, - "learning_rate": 1.2292225201072387e-05, - "loss": 0.0004, - "step": 2863 - }, - { - "epoch": 7.58, - "learning_rate": 1.2278820375335122e-05, - "loss": 0.0007, - "step": 2864 - }, - { - "epoch": 7.58, - "learning_rate": 1.2265415549597855e-05, - "loss": 0.0104, - "step": 2865 - }, - { - "epoch": 7.58, - "learning_rate": 1.225201072386059e-05, - "loss": 0.0003, - "step": 2866 - }, - { - "epoch": 7.58, - "learning_rate": 1.2238605898123324e-05, - "loss": 0.3976, - "step": 2867 - }, - { - "epoch": 7.59, - "learning_rate": 1.2225201072386059e-05, - "loss": 0.0003, - "step": 2868 - }, - { - "epoch": 7.59, - "learning_rate": 1.2211796246648794e-05, - "loss": 0.4433, - "step": 2869 - }, - { - "epoch": 7.59, - "learning_rate": 1.219839142091153e-05, - "loss": 0.0005, - "step": 2870 - }, - { - "epoch": 7.6, - "learning_rate": 1.2184986595174264e-05, - "loss": 0.0733, - "step": 2871 - }, - { - "epoch": 7.6, - "learning_rate": 1.2171581769436998e-05, - "loss": 0.0008, - "step": 2872 - }, - { - "epoch": 7.6, - "learning_rate": 1.2158176943699733e-05, - "loss": 0.0003, - "step": 2873 - }, - { - "epoch": 7.6, - "learning_rate": 1.2144772117962468e-05, - "loss": 0.0253, - "step": 2874 - }, - { - "epoch": 7.61, - "learning_rate": 1.2131367292225201e-05, - "loss": 0.09, - "step": 2875 - }, - { - "epoch": 7.61, - "learning_rate": 1.2117962466487937e-05, - "loss": 0.1283, - "step": 2876 - }, - { - "epoch": 7.61, - "learning_rate": 1.210455764075067e-05, - "loss": 0.0866, - "step": 2877 - }, - { - "epoch": 7.61, - "learning_rate": 1.2091152815013405e-05, - "loss": 0.0005, - "step": 2878 - }, - { - "epoch": 7.62, - "learning_rate": 1.207774798927614e-05, - "loss": 0.051, - "step": 2879 - }, - { - "epoch": 7.62, - "learning_rate": 1.2064343163538874e-05, - "loss": 0.0055, - "step": 2880 - }, - { - "epoch": 7.62, - "learning_rate": 1.2050938337801609e-05, - "loss": 0.001, - "step": 2881 - }, - { - "epoch": 7.62, - "learning_rate": 1.2037533512064344e-05, - "loss": 0.0765, - "step": 2882 - }, - { - "epoch": 7.63, - "learning_rate": 1.2024128686327079e-05, - "loss": 0.0239, - "step": 2883 - }, - { - "epoch": 7.63, - "learning_rate": 1.2010723860589814e-05, - "loss": 0.0616, - "step": 2884 - }, - { - "epoch": 7.63, - "learning_rate": 1.1997319034852548e-05, - "loss": 0.0342, - "step": 2885 - }, - { - "epoch": 7.63, - "learning_rate": 1.1983914209115283e-05, - "loss": 0.0006, - "step": 2886 - }, - { - "epoch": 7.64, - "learning_rate": 1.1970509383378016e-05, - "loss": 0.091, - "step": 2887 - }, - { - "epoch": 7.64, - "learning_rate": 1.1957104557640751e-05, - "loss": 0.0004, - "step": 2888 - }, - { - "epoch": 7.64, - "learning_rate": 1.1943699731903486e-05, - "loss": 0.0257, - "step": 2889 - }, - { - "epoch": 7.65, - "learning_rate": 1.193029490616622e-05, - "loss": 0.0422, - "step": 2890 - }, - { - "epoch": 7.65, - "learning_rate": 1.1916890080428955e-05, - "loss": 0.1861, - "step": 2891 - }, - { - "epoch": 7.65, - "learning_rate": 1.1903485254691689e-05, - "loss": 0.0003, - "step": 2892 - }, - { - "epoch": 7.65, - "learning_rate": 1.1890080428954424e-05, - "loss": 0.0678, - "step": 2893 - }, - { - "epoch": 7.66, - "learning_rate": 1.1876675603217159e-05, - "loss": 0.0005, - "step": 2894 - }, - { - "epoch": 7.66, - "learning_rate": 1.1863270777479892e-05, - "loss": 0.0234, - "step": 2895 - }, - { - "epoch": 7.66, - "learning_rate": 1.1849865951742627e-05, - "loss": 0.0007, - "step": 2896 - }, - { - "epoch": 7.66, - "learning_rate": 1.1836461126005362e-05, - "loss": 0.0963, - "step": 2897 - }, - { - "epoch": 7.67, - "learning_rate": 1.1823056300268098e-05, - "loss": 0.0132, - "step": 2898 - }, - { - "epoch": 7.67, - "learning_rate": 1.1809651474530833e-05, - "loss": 0.0019, - "step": 2899 - }, - { - "epoch": 7.67, - "learning_rate": 1.1796246648793566e-05, - "loss": 0.0219, - "step": 2900 - }, - { - "epoch": 7.67, - "learning_rate": 1.1782841823056301e-05, - "loss": 0.0062, - "step": 2901 - }, - { - "epoch": 7.68, - "learning_rate": 1.1769436997319036e-05, - "loss": 0.0003, - "step": 2902 - }, - { - "epoch": 7.68, - "learning_rate": 1.175603217158177e-05, - "loss": 0.0009, - "step": 2903 - }, - { - "epoch": 7.68, - "learning_rate": 1.1742627345844505e-05, - "loss": 0.1446, - "step": 2904 - }, - { - "epoch": 7.69, - "learning_rate": 1.1729222520107238e-05, - "loss": 0.0103, - "step": 2905 - }, - { - "epoch": 7.69, - "learning_rate": 1.1715817694369974e-05, - "loss": 0.0004, - "step": 2906 - }, - { - "epoch": 7.69, - "learning_rate": 1.1702412868632709e-05, - "loss": 0.2502, - "step": 2907 - }, - { - "epoch": 7.69, - "learning_rate": 1.1689008042895442e-05, - "loss": 0.0005, - "step": 2908 - }, - { - "epoch": 7.7, - "learning_rate": 1.1675603217158177e-05, - "loss": 0.0001, - "step": 2909 - }, - { - "epoch": 7.7, - "learning_rate": 1.166219839142091e-05, - "loss": 0.0928, - "step": 2910 - }, - { - "epoch": 7.7, - "learning_rate": 1.1648793565683646e-05, - "loss": 0.0195, - "step": 2911 - }, - { - "epoch": 7.7, - "learning_rate": 1.1635388739946381e-05, - "loss": 0.0727, - "step": 2912 - }, - { - "epoch": 7.71, - "learning_rate": 1.1621983914209116e-05, - "loss": 0.0778, - "step": 2913 - }, - { - "epoch": 7.71, - "learning_rate": 1.1608579088471851e-05, - "loss": 0.1304, - "step": 2914 - }, - { - "epoch": 7.71, - "learning_rate": 1.1595174262734585e-05, - "loss": 0.0002, - "step": 2915 - }, - { - "epoch": 7.71, - "learning_rate": 1.158176943699732e-05, - "loss": 0.0003, - "step": 2916 - }, - { - "epoch": 7.72, - "learning_rate": 1.1568364611260055e-05, - "loss": 0.0137, - "step": 2917 - }, - { - "epoch": 7.72, - "learning_rate": 1.1554959785522788e-05, - "loss": 0.0003, - "step": 2918 - }, - { - "epoch": 7.72, - "learning_rate": 1.1541554959785523e-05, - "loss": 0.0018, - "step": 2919 - }, - { - "epoch": 7.72, - "learning_rate": 1.1528150134048257e-05, - "loss": 0.0057, - "step": 2920 - }, - { - "epoch": 7.73, - "learning_rate": 1.1514745308310992e-05, - "loss": 0.0003, - "step": 2921 - }, - { - "epoch": 7.73, - "learning_rate": 1.1501340482573727e-05, - "loss": 0.0015, - "step": 2922 - }, - { - "epoch": 7.73, - "learning_rate": 1.148793565683646e-05, - "loss": 0.0004, - "step": 2923 - }, - { - "epoch": 7.74, - "learning_rate": 1.1474530831099196e-05, - "loss": 0.0005, - "step": 2924 - }, - { - "epoch": 7.74, - "learning_rate": 1.1461126005361931e-05, - "loss": 0.0345, - "step": 2925 - }, - { - "epoch": 7.74, - "learning_rate": 1.1447721179624666e-05, - "loss": 0.0878, - "step": 2926 - }, - { - "epoch": 7.74, - "learning_rate": 1.1434316353887401e-05, - "loss": 0.0003, - "step": 2927 - }, - { - "epoch": 7.75, - "learning_rate": 1.1420911528150135e-05, - "loss": 0.0732, - "step": 2928 - }, - { - "epoch": 7.75, - "learning_rate": 1.140750670241287e-05, - "loss": 0.0005, - "step": 2929 - }, - { - "epoch": 7.75, - "learning_rate": 1.1394101876675605e-05, - "loss": 0.001, - "step": 2930 - }, - { - "epoch": 7.75, - "learning_rate": 1.1380697050938338e-05, - "loss": 0.0038, - "step": 2931 - }, - { - "epoch": 7.76, - "learning_rate": 1.1367292225201073e-05, - "loss": 0.0056, - "step": 2932 - }, - { - "epoch": 7.76, - "learning_rate": 1.1353887399463807e-05, - "loss": 0.1057, - "step": 2933 - }, - { - "epoch": 7.76, - "learning_rate": 1.1340482573726542e-05, - "loss": 0.0005, - "step": 2934 - }, - { - "epoch": 7.76, - "learning_rate": 1.1327077747989277e-05, - "loss": 0.0419, - "step": 2935 - }, - { - "epoch": 7.77, - "learning_rate": 1.131367292225201e-05, - "loss": 0.0304, - "step": 2936 - }, - { - "epoch": 7.77, - "learning_rate": 1.1300268096514746e-05, - "loss": 0.0002, - "step": 2937 - }, - { - "epoch": 7.77, - "learning_rate": 1.1286863270777479e-05, - "loss": 0.0332, - "step": 2938 - }, - { - "epoch": 7.78, - "learning_rate": 1.1273458445040214e-05, - "loss": 0.1015, - "step": 2939 - }, - { - "epoch": 7.78, - "learning_rate": 1.126005361930295e-05, - "loss": 0.0008, - "step": 2940 - }, - { - "epoch": 7.78, - "learning_rate": 1.1246648793565684e-05, - "loss": 0.0273, - "step": 2941 - }, - { - "epoch": 7.78, - "learning_rate": 1.123324396782842e-05, - "loss": 0.0022, - "step": 2942 - }, - { - "epoch": 7.79, - "learning_rate": 1.1219839142091153e-05, - "loss": 0.0009, - "step": 2943 - }, - { - "epoch": 7.79, - "learning_rate": 1.1206434316353888e-05, - "loss": 0.0003, - "step": 2944 - }, - { - "epoch": 7.79, - "learning_rate": 1.1193029490616623e-05, - "loss": 0.0057, - "step": 2945 - }, - { - "epoch": 7.79, - "learning_rate": 1.1179624664879357e-05, - "loss": 0.0014, - "step": 2946 - }, - { - "epoch": 7.8, - "learning_rate": 1.1166219839142092e-05, - "loss": 0.0009, - "step": 2947 - }, - { - "epoch": 7.8, - "learning_rate": 1.1152815013404825e-05, - "loss": 0.0019, - "step": 2948 - }, - { - "epoch": 7.8, - "learning_rate": 1.113941018766756e-05, - "loss": 0.0005, - "step": 2949 - }, - { - "epoch": 7.8, - "learning_rate": 1.1126005361930296e-05, - "loss": 0.0181, - "step": 2950 - }, - { - "epoch": 7.81, - "learning_rate": 1.1112600536193029e-05, - "loss": 0.001, - "step": 2951 - }, - { - "epoch": 7.81, - "learning_rate": 1.1099195710455764e-05, - "loss": 0.0331, - "step": 2952 - }, - { - "epoch": 7.81, - "learning_rate": 1.10857908847185e-05, - "loss": 0.0003, - "step": 2953 - }, - { - "epoch": 7.81, - "learning_rate": 1.1072386058981234e-05, - "loss": 0.0002, - "step": 2954 - }, - { - "epoch": 7.82, - "learning_rate": 1.105898123324397e-05, - "loss": 0.0305, - "step": 2955 - }, - { - "epoch": 7.82, - "learning_rate": 1.1045576407506703e-05, - "loss": 0.0023, - "step": 2956 - }, - { - "epoch": 7.82, - "learning_rate": 1.1032171581769438e-05, - "loss": 0.0359, - "step": 2957 - }, - { - "epoch": 7.83, - "learning_rate": 1.1018766756032173e-05, - "loss": 0.1075, - "step": 2958 - }, - { - "epoch": 7.83, - "learning_rate": 1.1005361930294907e-05, - "loss": 0.023, - "step": 2959 - }, - { - "epoch": 7.83, - "learning_rate": 1.0991957104557642e-05, - "loss": 0.1425, - "step": 2960 - }, - { - "epoch": 7.83, - "learning_rate": 1.0978552278820375e-05, - "loss": 0.4114, - "step": 2961 - }, - { - "epoch": 7.84, - "learning_rate": 1.096514745308311e-05, - "loss": 0.0003, - "step": 2962 - }, - { - "epoch": 7.84, - "learning_rate": 1.0951742627345846e-05, - "loss": 0.2824, - "step": 2963 - }, - { - "epoch": 7.84, - "learning_rate": 1.0938337801608579e-05, - "loss": 0.0002, - "step": 2964 - }, - { - "epoch": 7.84, - "learning_rate": 1.0924932975871314e-05, - "loss": 0.0004, - "step": 2965 - }, - { - "epoch": 7.85, - "learning_rate": 1.0911528150134048e-05, - "loss": 0.0002, - "step": 2966 - }, - { - "epoch": 7.85, - "learning_rate": 1.0898123324396783e-05, - "loss": 0.0003, - "step": 2967 - }, - { - "epoch": 7.85, - "learning_rate": 1.0884718498659518e-05, - "loss": 0.0003, - "step": 2968 - }, - { - "epoch": 7.85, - "learning_rate": 1.0871313672922253e-05, - "loss": 0.2122, - "step": 2969 - }, - { - "epoch": 7.86, - "learning_rate": 1.0857908847184988e-05, - "loss": 0.0002, - "step": 2970 - }, - { - "epoch": 7.86, - "learning_rate": 1.0844504021447721e-05, - "loss": 0.0003, - "step": 2971 - }, - { - "epoch": 7.86, - "learning_rate": 1.0831099195710457e-05, - "loss": 0.0002, - "step": 2972 - }, - { - "epoch": 7.87, - "learning_rate": 1.0817694369973192e-05, - "loss": 0.0002, - "step": 2973 - }, - { - "epoch": 7.87, - "learning_rate": 1.0804289544235925e-05, - "loss": 0.001, - "step": 2974 - }, - { - "epoch": 7.87, - "learning_rate": 1.079088471849866e-05, - "loss": 0.0002, - "step": 2975 - }, - { - "epoch": 7.87, - "learning_rate": 1.0777479892761394e-05, - "loss": 0.0004, - "step": 2976 - }, - { - "epoch": 7.88, - "learning_rate": 1.0764075067024129e-05, - "loss": 0.0003, - "step": 2977 - }, - { - "epoch": 7.88, - "learning_rate": 1.0750670241286864e-05, - "loss": 0.0003, - "step": 2978 - }, - { - "epoch": 7.88, - "learning_rate": 1.0737265415549597e-05, - "loss": 0.336, - "step": 2979 - }, - { - "epoch": 7.88, - "learning_rate": 1.0723860589812333e-05, - "loss": 0.0003, - "step": 2980 - }, - { - "epoch": 7.89, - "learning_rate": 1.0710455764075068e-05, - "loss": 0.0017, - "step": 2981 - }, - { - "epoch": 7.89, - "learning_rate": 1.0697050938337803e-05, - "loss": 0.1716, - "step": 2982 - }, - { - "epoch": 7.89, - "learning_rate": 1.0683646112600538e-05, - "loss": 0.0004, - "step": 2983 - }, - { - "epoch": 7.89, - "learning_rate": 1.0670241286863271e-05, - "loss": 0.0003, - "step": 2984 - }, - { - "epoch": 7.9, - "learning_rate": 1.0656836461126007e-05, - "loss": 0.1927, - "step": 2985 - }, - { - "epoch": 7.9, - "learning_rate": 1.064343163538874e-05, - "loss": 0.0003, - "step": 2986 - }, - { - "epoch": 7.9, - "learning_rate": 1.0630026809651475e-05, - "loss": 0.0002, - "step": 2987 - }, - { - "epoch": 7.9, - "learning_rate": 1.061662198391421e-05, - "loss": 0.2357, - "step": 2988 - }, - { - "epoch": 7.91, - "learning_rate": 1.0603217158176944e-05, - "loss": 0.464, - "step": 2989 - }, - { - "epoch": 7.91, - "learning_rate": 1.0589812332439679e-05, - "loss": 0.0015, - "step": 2990 - }, - { - "epoch": 7.91, - "learning_rate": 1.0576407506702414e-05, - "loss": 0.0792, - "step": 2991 - }, - { - "epoch": 7.92, - "learning_rate": 1.0563002680965147e-05, - "loss": 0.101, - "step": 2992 - }, - { - "epoch": 7.92, - "learning_rate": 1.0549597855227882e-05, - "loss": 0.0093, - "step": 2993 - }, - { - "epoch": 7.92, - "learning_rate": 1.0536193029490616e-05, - "loss": 0.0007, - "step": 2994 - }, - { - "epoch": 7.92, - "learning_rate": 1.0522788203753351e-05, - "loss": 0.0016, - "step": 2995 - }, - { - "epoch": 7.93, - "learning_rate": 1.0509383378016086e-05, - "loss": 0.0008, - "step": 2996 - }, - { - "epoch": 7.93, - "learning_rate": 1.0495978552278821e-05, - "loss": 0.0047, - "step": 2997 - }, - { - "epoch": 7.93, - "learning_rate": 1.0482573726541556e-05, - "loss": 0.0171, - "step": 2998 - }, - { - "epoch": 7.93, - "learning_rate": 1.046916890080429e-05, - "loss": 0.3023, - "step": 2999 - }, - { - "epoch": 7.94, - "learning_rate": 1.0455764075067025e-05, - "loss": 0.0011, - "step": 3000 - }, - { - "epoch": 7.94, - "learning_rate": 1.044235924932976e-05, - "loss": 0.0816, - "step": 3001 - }, - { - "epoch": 7.94, - "learning_rate": 1.0428954423592494e-05, - "loss": 0.0025, - "step": 3002 - }, - { - "epoch": 7.94, - "learning_rate": 1.0415549597855229e-05, - "loss": 0.0094, - "step": 3003 - }, - { - "epoch": 7.95, - "learning_rate": 1.0402144772117962e-05, - "loss": 0.0644, - "step": 3004 - }, - { - "epoch": 7.95, - "learning_rate": 1.0388739946380697e-05, - "loss": 0.3261, - "step": 3005 - }, - { - "epoch": 7.95, - "learning_rate": 1.0375335120643432e-05, - "loss": 0.1332, - "step": 3006 - }, - { - "epoch": 7.96, - "learning_rate": 1.0361930294906166e-05, - "loss": 0.0067, - "step": 3007 - }, - { - "epoch": 7.96, - "learning_rate": 1.0348525469168901e-05, - "loss": 0.0008, - "step": 3008 - }, - { - "epoch": 7.96, - "learning_rate": 1.0335120643431636e-05, - "loss": 0.174, - "step": 3009 - }, - { - "epoch": 7.96, - "learning_rate": 1.0321715817694371e-05, - "loss": 0.0005, - "step": 3010 - }, - { - "epoch": 7.97, - "learning_rate": 1.0308310991957106e-05, - "loss": 0.0505, - "step": 3011 - }, - { - "epoch": 7.97, - "learning_rate": 1.029490616621984e-05, - "loss": 0.0016, - "step": 3012 - }, - { - "epoch": 7.97, - "learning_rate": 1.0281501340482575e-05, - "loss": 0.1172, - "step": 3013 - }, - { - "epoch": 7.97, - "learning_rate": 1.0268096514745308e-05, - "loss": 0.0268, - "step": 3014 - }, - { - "epoch": 7.98, - "learning_rate": 1.0254691689008044e-05, - "loss": 0.0269, - "step": 3015 - }, - { - "epoch": 7.98, - "learning_rate": 1.0241286863270779e-05, - "loss": 0.0867, - "step": 3016 - }, - { - "epoch": 7.98, - "learning_rate": 1.0227882037533512e-05, - "loss": 0.1145, - "step": 3017 - }, - { - "epoch": 7.98, - "learning_rate": 1.0214477211796247e-05, - "loss": 0.0035, - "step": 3018 - }, - { - "epoch": 7.99, - "learning_rate": 1.0201072386058982e-05, - "loss": 0.0035, - "step": 3019 - }, - { - "epoch": 7.99, - "learning_rate": 1.0187667560321716e-05, - "loss": 0.0003, - "step": 3020 - }, - { - "epoch": 7.99, - "learning_rate": 1.0174262734584451e-05, - "loss": 0.14, - "step": 3021 - }, - { - "epoch": 7.99, - "learning_rate": 1.0160857908847184e-05, - "loss": 0.1619, - "step": 3022 - }, - { - "epoch": 8.0, - "learning_rate": 1.014745308310992e-05, - "loss": 0.0006, - "step": 3023 - }, - { - "epoch": 8.0, - "learning_rate": 1.0134048257372655e-05, - "loss": 0.0004, - "step": 3024 - }, - { - "epoch": 8.0, - "eval_f1": 0.7734138972809668, - "eval_loss": 1.2510614395141602, - "eval_runtime": 1.9043, - "eval_samples_per_second": 794.53, - "eval_steps_per_second": 49.888, - "step": 3024 - }, - { - "epoch": 8.0, - "learning_rate": 1.012064343163539e-05, - "loss": 0.0008, - "step": 3025 - }, - { - "epoch": 8.01, - "learning_rate": 1.0107238605898125e-05, - "loss": 0.0308, - "step": 3026 - }, - { - "epoch": 8.01, - "learning_rate": 1.0093833780160858e-05, - "loss": 0.1509, - "step": 3027 - }, - { - "epoch": 8.01, - "learning_rate": 1.0080428954423593e-05, - "loss": 0.0256, - "step": 3028 - }, - { - "epoch": 8.01, - "learning_rate": 1.0067024128686329e-05, - "loss": 0.0013, - "step": 3029 - }, - { - "epoch": 8.02, - "learning_rate": 1.0053619302949062e-05, - "loss": 0.0002, - "step": 3030 - }, - { - "epoch": 8.02, - "learning_rate": 1.0040214477211797e-05, - "loss": 0.0615, - "step": 3031 - }, - { - "epoch": 8.02, - "learning_rate": 1.002680965147453e-05, - "loss": 0.072, - "step": 3032 - }, - { - "epoch": 8.02, - "learning_rate": 1.0013404825737266e-05, - "loss": 0.0311, - "step": 3033 - }, - { - "epoch": 8.03, - "learning_rate": 1e-05, - "loss": 0.0115, - "step": 3034 - }, - { - "epoch": 8.03, - "learning_rate": 9.986595174262734e-06, - "loss": 0.0016, - "step": 3035 - }, - { - "epoch": 8.03, - "learning_rate": 9.97319034852547e-06, - "loss": 0.0006, - "step": 3036 - }, - { - "epoch": 8.03, - "learning_rate": 9.959785522788203e-06, - "loss": 0.0009, - "step": 3037 - }, - { - "epoch": 8.04, - "learning_rate": 9.946380697050938e-06, - "loss": 0.0002, - "step": 3038 - }, - { - "epoch": 8.04, - "learning_rate": 9.932975871313673e-06, - "loss": 0.0312, - "step": 3039 - }, - { - "epoch": 8.04, - "learning_rate": 9.919571045576408e-06, - "loss": 0.0007, - "step": 3040 - }, - { - "epoch": 8.04, - "learning_rate": 9.906166219839143e-06, - "loss": 0.0168, - "step": 3041 - }, - { - "epoch": 8.05, - "learning_rate": 9.892761394101877e-06, - "loss": 0.1056, - "step": 3042 - }, - { - "epoch": 8.05, - "learning_rate": 9.879356568364612e-06, - "loss": 0.0005, - "step": 3043 - }, - { - "epoch": 8.05, - "learning_rate": 9.865951742627347e-06, - "loss": 0.0003, - "step": 3044 - }, - { - "epoch": 8.06, - "learning_rate": 9.85254691689008e-06, - "loss": 0.0407, - "step": 3045 - }, - { - "epoch": 8.06, - "learning_rate": 9.839142091152816e-06, - "loss": 0.0013, - "step": 3046 - }, - { - "epoch": 8.06, - "learning_rate": 9.825737265415549e-06, - "loss": 0.033, - "step": 3047 - }, - { - "epoch": 8.06, - "learning_rate": 9.812332439678284e-06, - "loss": 0.0007, - "step": 3048 - }, - { - "epoch": 8.07, - "learning_rate": 9.79892761394102e-06, - "loss": 0.0356, - "step": 3049 - }, - { - "epoch": 8.07, - "learning_rate": 9.785522788203753e-06, - "loss": 0.0062, - "step": 3050 - }, - { - "epoch": 8.07, - "learning_rate": 9.772117962466488e-06, - "loss": 0.0005, - "step": 3051 - }, - { - "epoch": 8.07, - "learning_rate": 9.758713136729223e-06, - "loss": 0.0133, - "step": 3052 - }, - { - "epoch": 8.08, - "learning_rate": 9.745308310991958e-06, - "loss": 0.0022, - "step": 3053 - }, - { - "epoch": 8.08, - "learning_rate": 9.731903485254693e-06, - "loss": 0.0063, - "step": 3054 - }, - { - "epoch": 8.08, - "learning_rate": 9.718498659517427e-06, - "loss": 0.3304, - "step": 3055 - }, - { - "epoch": 8.08, - "learning_rate": 9.705093833780162e-06, - "loss": 0.0004, - "step": 3056 - }, - { - "epoch": 8.09, - "learning_rate": 9.691689008042897e-06, - "loss": 0.0335, - "step": 3057 - }, - { - "epoch": 8.09, - "learning_rate": 9.67828418230563e-06, - "loss": 0.1251, - "step": 3058 - }, - { - "epoch": 8.09, - "learning_rate": 9.664879356568366e-06, - "loss": 0.0011, - "step": 3059 - }, - { - "epoch": 8.1, - "learning_rate": 9.651474530831099e-06, - "loss": 0.0003, - "step": 3060 - }, - { - "epoch": 8.1, - "learning_rate": 9.638069705093834e-06, - "loss": 0.2906, - "step": 3061 - }, - { - "epoch": 8.1, - "learning_rate": 9.62466487935657e-06, - "loss": 0.0009, - "step": 3062 - }, - { - "epoch": 8.1, - "learning_rate": 9.611260053619303e-06, - "loss": 0.0005, - "step": 3063 - }, - { - "epoch": 8.11, - "learning_rate": 9.597855227882038e-06, - "loss": 0.0107, - "step": 3064 - }, - { - "epoch": 8.11, - "learning_rate": 9.584450402144771e-06, - "loss": 0.0594, - "step": 3065 - }, - { - "epoch": 8.11, - "learning_rate": 9.571045576407506e-06, - "loss": 0.0927, - "step": 3066 - }, - { - "epoch": 8.11, - "learning_rate": 9.557640750670241e-06, - "loss": 0.1164, - "step": 3067 - }, - { - "epoch": 8.12, - "learning_rate": 9.544235924932977e-06, - "loss": 0.0002, - "step": 3068 - }, - { - "epoch": 8.12, - "learning_rate": 9.530831099195712e-06, - "loss": 0.0004, - "step": 3069 - }, - { - "epoch": 8.12, - "learning_rate": 9.517426273458445e-06, - "loss": 0.0004, - "step": 3070 - }, - { - "epoch": 8.12, - "learning_rate": 9.50402144772118e-06, - "loss": 0.0128, - "step": 3071 - }, - { - "epoch": 8.13, - "learning_rate": 9.490616621983915e-06, - "loss": 0.0004, - "step": 3072 - }, - { - "epoch": 8.13, - "learning_rate": 9.477211796246649e-06, - "loss": 0.0003, - "step": 3073 - }, - { - "epoch": 8.13, - "learning_rate": 9.463806970509384e-06, - "loss": 0.0311, - "step": 3074 - }, - { - "epoch": 8.13, - "learning_rate": 9.450402144772117e-06, - "loss": 0.0204, - "step": 3075 - }, - { - "epoch": 8.14, - "learning_rate": 9.436997319034853e-06, - "loss": 0.0026, - "step": 3076 - }, - { - "epoch": 8.14, - "learning_rate": 9.423592493297588e-06, - "loss": 0.0008, - "step": 3077 - }, - { - "epoch": 8.14, - "learning_rate": 9.410187667560321e-06, - "loss": 0.1434, - "step": 3078 - }, - { - "epoch": 8.15, - "learning_rate": 9.396782841823056e-06, - "loss": 0.0005, - "step": 3079 - }, - { - "epoch": 8.15, - "learning_rate": 9.383378016085791e-06, - "loss": 0.0003, - "step": 3080 - }, - { - "epoch": 8.15, - "learning_rate": 9.369973190348527e-06, - "loss": 0.0002, - "step": 3081 - }, - { - "epoch": 8.15, - "learning_rate": 9.356568364611262e-06, - "loss": 0.0003, - "step": 3082 - }, - { - "epoch": 8.16, - "learning_rate": 9.343163538873995e-06, - "loss": 0.0476, - "step": 3083 - }, - { - "epoch": 8.16, - "learning_rate": 9.32975871313673e-06, - "loss": 0.0002, - "step": 3084 - }, - { - "epoch": 8.16, - "learning_rate": 9.316353887399465e-06, - "loss": 0.0004, - "step": 3085 - }, - { - "epoch": 8.16, - "learning_rate": 9.302949061662199e-06, - "loss": 0.0004, - "step": 3086 - }, - { - "epoch": 8.17, - "learning_rate": 9.289544235924934e-06, - "loss": 0.195, - "step": 3087 - }, - { - "epoch": 8.17, - "learning_rate": 9.276139410187667e-06, - "loss": 0.0258, - "step": 3088 - }, - { - "epoch": 8.17, - "learning_rate": 9.262734584450403e-06, - "loss": 0.0003, - "step": 3089 - }, - { - "epoch": 8.17, - "learning_rate": 9.249329758713138e-06, - "loss": 0.0582, - "step": 3090 - }, - { - "epoch": 8.18, - "learning_rate": 9.235924932975871e-06, - "loss": 0.0192, - "step": 3091 - }, - { - "epoch": 8.18, - "learning_rate": 9.222520107238606e-06, - "loss": 0.2512, - "step": 3092 - }, - { - "epoch": 8.18, - "learning_rate": 9.20911528150134e-06, - "loss": 0.0361, - "step": 3093 - }, - { - "epoch": 8.19, - "learning_rate": 9.195710455764075e-06, - "loss": 0.0003, - "step": 3094 - }, - { - "epoch": 8.19, - "learning_rate": 9.18230563002681e-06, - "loss": 0.0004, - "step": 3095 - }, - { - "epoch": 8.19, - "learning_rate": 9.168900804289545e-06, - "loss": 0.0002, - "step": 3096 - }, - { - "epoch": 8.19, - "learning_rate": 9.15549597855228e-06, - "loss": 0.0005, - "step": 3097 - }, - { - "epoch": 8.2, - "learning_rate": 9.142091152815014e-06, - "loss": 0.0427, - "step": 3098 - }, - { - "epoch": 8.2, - "learning_rate": 9.128686327077749e-06, - "loss": 0.0295, - "step": 3099 - }, - { - "epoch": 8.2, - "learning_rate": 9.115281501340484e-06, - "loss": 0.0144, - "step": 3100 - }, - { - "epoch": 8.2, - "learning_rate": 9.101876675603217e-06, - "loss": 0.001, - "step": 3101 - }, - { - "epoch": 8.21, - "learning_rate": 9.088471849865952e-06, - "loss": 0.0004, - "step": 3102 - }, - { - "epoch": 8.21, - "learning_rate": 9.075067024128686e-06, - "loss": 0.0005, - "step": 3103 - }, - { - "epoch": 8.21, - "learning_rate": 9.061662198391421e-06, - "loss": 0.1232, - "step": 3104 - }, - { - "epoch": 8.21, - "learning_rate": 9.048257372654156e-06, - "loss": 0.0004, - "step": 3105 - }, - { - "epoch": 8.22, - "learning_rate": 9.03485254691689e-06, - "loss": 0.004, - "step": 3106 - }, - { - "epoch": 8.22, - "learning_rate": 9.021447721179625e-06, - "loss": 0.0002, - "step": 3107 - }, - { - "epoch": 8.22, - "learning_rate": 9.00804289544236e-06, - "loss": 0.0067, - "step": 3108 - }, - { - "epoch": 8.22, - "learning_rate": 8.994638069705095e-06, - "loss": 0.0003, - "step": 3109 - }, - { - "epoch": 8.23, - "learning_rate": 8.98123324396783e-06, - "loss": 0.0344, - "step": 3110 - }, - { - "epoch": 8.23, - "learning_rate": 8.967828418230564e-06, - "loss": 0.0005, - "step": 3111 - }, - { - "epoch": 8.23, - "learning_rate": 8.954423592493299e-06, - "loss": 0.0029, - "step": 3112 - }, - { - "epoch": 8.24, - "learning_rate": 8.941018766756034e-06, - "loss": 0.0002, - "step": 3113 - }, - { - "epoch": 8.24, - "learning_rate": 8.927613941018767e-06, - "loss": 0.0003, - "step": 3114 - }, - { - "epoch": 8.24, - "learning_rate": 8.914209115281502e-06, - "loss": 0.0002, - "step": 3115 - }, - { - "epoch": 8.24, - "learning_rate": 8.900804289544236e-06, - "loss": 0.0197, - "step": 3116 - }, - { - "epoch": 8.25, - "learning_rate": 8.887399463806971e-06, - "loss": 0.0002, - "step": 3117 - }, - { - "epoch": 8.25, - "learning_rate": 8.873994638069706e-06, - "loss": 0.0003, - "step": 3118 - }, - { - "epoch": 8.25, - "learning_rate": 8.86058981233244e-06, - "loss": 0.097, - "step": 3119 - }, - { - "epoch": 8.25, - "learning_rate": 8.847184986595175e-06, - "loss": 0.0014, - "step": 3120 - }, - { - "epoch": 8.26, - "learning_rate": 8.833780160857908e-06, - "loss": 0.0004, - "step": 3121 - }, - { - "epoch": 8.26, - "learning_rate": 8.820375335120643e-06, - "loss": 0.0005, - "step": 3122 - }, - { - "epoch": 8.26, - "learning_rate": 8.806970509383378e-06, - "loss": 0.0814, - "step": 3123 - }, - { - "epoch": 8.26, - "learning_rate": 8.793565683646113e-06, - "loss": 0.0024, - "step": 3124 - }, - { - "epoch": 8.27, - "learning_rate": 8.780160857908849e-06, - "loss": 0.0003, - "step": 3125 - }, - { - "epoch": 8.27, - "learning_rate": 8.766756032171582e-06, - "loss": 0.0001, - "step": 3126 - }, - { - "epoch": 8.27, - "learning_rate": 8.753351206434317e-06, - "loss": 0.0003, - "step": 3127 - }, - { - "epoch": 8.28, - "learning_rate": 8.739946380697052e-06, - "loss": 0.3459, - "step": 3128 - }, - { - "epoch": 8.28, - "learning_rate": 8.726541554959786e-06, - "loss": 0.0639, - "step": 3129 - }, - { - "epoch": 8.28, - "learning_rate": 8.71313672922252e-06, - "loss": 0.001, - "step": 3130 - }, - { - "epoch": 8.28, - "learning_rate": 8.699731903485254e-06, - "loss": 0.0073, - "step": 3131 - }, - { - "epoch": 8.29, - "learning_rate": 8.68632707774799e-06, - "loss": 0.0002, - "step": 3132 - }, - { - "epoch": 8.29, - "learning_rate": 8.672922252010725e-06, - "loss": 0.0008, - "step": 3133 - }, - { - "epoch": 8.29, - "learning_rate": 8.659517426273458e-06, - "loss": 0.0001, - "step": 3134 - }, - { - "epoch": 8.29, - "learning_rate": 8.646112600536193e-06, - "loss": 0.0002, - "step": 3135 - }, - { - "epoch": 8.3, - "learning_rate": 8.632707774798928e-06, - "loss": 0.0028, - "step": 3136 - }, - { - "epoch": 8.3, - "learning_rate": 8.619302949061663e-06, - "loss": 0.02, - "step": 3137 - }, - { - "epoch": 8.3, - "learning_rate": 8.605898123324398e-06, - "loss": 0.046, - "step": 3138 - }, - { - "epoch": 8.3, - "learning_rate": 8.592493297587132e-06, - "loss": 0.0002, - "step": 3139 - }, - { - "epoch": 8.31, - "learning_rate": 8.579088471849867e-06, - "loss": 0.0002, - "step": 3140 - }, - { - "epoch": 8.31, - "learning_rate": 8.5656836461126e-06, - "loss": 0.0695, - "step": 3141 - }, - { - "epoch": 8.31, - "learning_rate": 8.552278820375336e-06, - "loss": 0.1764, - "step": 3142 - }, - { - "epoch": 8.31, - "learning_rate": 8.53887399463807e-06, - "loss": 0.0002, - "step": 3143 - }, - { - "epoch": 8.32, - "learning_rate": 8.525469168900804e-06, - "loss": 0.0004, - "step": 3144 - }, - { - "epoch": 8.32, - "learning_rate": 8.51206434316354e-06, - "loss": 0.0207, - "step": 3145 - }, - { - "epoch": 8.32, - "learning_rate": 8.498659517426274e-06, - "loss": 0.0003, - "step": 3146 - }, - { - "epoch": 8.33, - "learning_rate": 8.485254691689008e-06, - "loss": 0.1444, - "step": 3147 - }, - { - "epoch": 8.33, - "learning_rate": 8.471849865951743e-06, - "loss": 0.0006, - "step": 3148 - }, - { - "epoch": 8.33, - "learning_rate": 8.458445040214476e-06, - "loss": 0.0002, - "step": 3149 - }, - { - "epoch": 8.33, - "learning_rate": 8.445040214477212e-06, - "loss": 0.0003, - "step": 3150 - }, - { - "epoch": 8.34, - "learning_rate": 8.431635388739947e-06, - "loss": 0.0033, - "step": 3151 - }, - { - "epoch": 8.34, - "learning_rate": 8.418230563002682e-06, - "loss": 0.0001, - "step": 3152 - }, - { - "epoch": 8.34, - "learning_rate": 8.404825737265417e-06, - "loss": 0.0002, - "step": 3153 - }, - { - "epoch": 8.34, - "learning_rate": 8.39142091152815e-06, - "loss": 0.0003, - "step": 3154 - }, - { - "epoch": 8.35, - "learning_rate": 8.378016085790886e-06, - "loss": 0.0003, - "step": 3155 - }, - { - "epoch": 8.35, - "learning_rate": 8.36461126005362e-06, - "loss": 0.0002, - "step": 3156 - }, - { - "epoch": 8.35, - "learning_rate": 8.351206434316354e-06, - "loss": 0.0003, - "step": 3157 - }, - { - "epoch": 8.35, - "learning_rate": 8.33780160857909e-06, - "loss": 0.0022, - "step": 3158 - }, - { - "epoch": 8.36, - "learning_rate": 8.324396782841823e-06, - "loss": 0.0094, - "step": 3159 - }, - { - "epoch": 8.36, - "learning_rate": 8.310991957104558e-06, - "loss": 0.039, - "step": 3160 - }, - { - "epoch": 8.36, - "learning_rate": 8.297587131367293e-06, - "loss": 0.0623, - "step": 3161 - }, - { - "epoch": 8.37, - "learning_rate": 8.284182305630026e-06, - "loss": 0.0269, - "step": 3162 - }, - { - "epoch": 8.37, - "learning_rate": 8.270777479892762e-06, - "loss": 0.2292, - "step": 3163 - }, - { - "epoch": 8.37, - "learning_rate": 8.257372654155495e-06, - "loss": 0.0814, - "step": 3164 - }, - { - "epoch": 8.37, - "learning_rate": 8.24396782841823e-06, - "loss": 0.0002, - "step": 3165 - }, - { - "epoch": 8.38, - "learning_rate": 8.230563002680965e-06, - "loss": 0.0006, - "step": 3166 - }, - { - "epoch": 8.38, - "learning_rate": 8.2171581769437e-06, - "loss": 0.0405, - "step": 3167 - }, - { - "epoch": 8.38, - "learning_rate": 8.203753351206435e-06, - "loss": 0.3745, - "step": 3168 - }, - { - "epoch": 8.38, - "learning_rate": 8.190348525469169e-06, - "loss": 0.0002, - "step": 3169 - }, - { - "epoch": 8.39, - "learning_rate": 8.176943699731904e-06, - "loss": 0.0007, - "step": 3170 - }, - { - "epoch": 8.39, - "learning_rate": 8.16353887399464e-06, - "loss": 0.0054, - "step": 3171 - }, - { - "epoch": 8.39, - "learning_rate": 8.150134048257373e-06, - "loss": 0.0027, - "step": 3172 - }, - { - "epoch": 8.39, - "learning_rate": 8.136729222520108e-06, - "loss": 0.0823, - "step": 3173 - }, - { - "epoch": 8.4, - "learning_rate": 8.123324396782843e-06, - "loss": 0.2821, - "step": 3174 - }, - { - "epoch": 8.4, - "learning_rate": 8.109919571045576e-06, - "loss": 0.2796, - "step": 3175 - }, - { - "epoch": 8.4, - "learning_rate": 8.096514745308311e-06, - "loss": 0.0004, - "step": 3176 - }, - { - "epoch": 8.4, - "learning_rate": 8.083109919571045e-06, - "loss": 0.0019, - "step": 3177 - }, - { - "epoch": 8.41, - "learning_rate": 8.06970509383378e-06, - "loss": 0.0003, - "step": 3178 - }, - { - "epoch": 8.41, - "learning_rate": 8.056300268096515e-06, - "loss": 0.0331, - "step": 3179 - }, - { - "epoch": 8.41, - "learning_rate": 8.04289544235925e-06, - "loss": 0.0002, - "step": 3180 - }, - { - "epoch": 8.42, - "learning_rate": 8.029490616621985e-06, - "loss": 0.002, - "step": 3181 - }, - { - "epoch": 8.42, - "learning_rate": 8.016085790884719e-06, - "loss": 0.0055, - "step": 3182 - }, - { - "epoch": 8.42, - "learning_rate": 8.002680965147454e-06, - "loss": 0.0002, - "step": 3183 - }, - { - "epoch": 8.42, - "learning_rate": 7.989276139410189e-06, - "loss": 0.0252, - "step": 3184 - }, - { - "epoch": 8.43, - "learning_rate": 7.975871313672923e-06, - "loss": 0.0005, - "step": 3185 - }, - { - "epoch": 8.43, - "learning_rate": 7.962466487935658e-06, - "loss": 0.0309, - "step": 3186 - }, - { - "epoch": 8.43, - "learning_rate": 7.949061662198391e-06, - "loss": 0.4315, - "step": 3187 - }, - { - "epoch": 8.43, - "learning_rate": 7.935656836461126e-06, - "loss": 0.0018, - "step": 3188 - }, - { - "epoch": 8.44, - "learning_rate": 7.922252010723861e-06, - "loss": 0.0973, - "step": 3189 - }, - { - "epoch": 8.44, - "learning_rate": 7.908847184986595e-06, - "loss": 0.0002, - "step": 3190 - }, - { - "epoch": 8.44, - "learning_rate": 7.89544235924933e-06, - "loss": 0.0123, - "step": 3191 - }, - { - "epoch": 8.44, - "learning_rate": 7.882037533512063e-06, - "loss": 0.0005, - "step": 3192 - }, - { - "epoch": 8.45, - "learning_rate": 7.868632707774798e-06, - "loss": 0.0002, - "step": 3193 - }, - { - "epoch": 8.45, - "learning_rate": 7.855227882037534e-06, - "loss": 0.002, - "step": 3194 - }, - { - "epoch": 8.45, - "learning_rate": 7.841823056300269e-06, - "loss": 0.0002, - "step": 3195 - }, - { - "epoch": 8.46, - "learning_rate": 7.828418230563004e-06, - "loss": 0.0006, - "step": 3196 - }, - { - "epoch": 8.46, - "learning_rate": 7.815013404825737e-06, - "loss": 0.0669, - "step": 3197 - }, - { - "epoch": 8.46, - "learning_rate": 7.801608579088472e-06, - "loss": 0.0002, - "step": 3198 - }, - { - "epoch": 8.46, - "learning_rate": 7.788203753351208e-06, - "loss": 0.0052, - "step": 3199 - }, - { - "epoch": 8.47, - "learning_rate": 7.774798927613941e-06, - "loss": 0.1126, - "step": 3200 - }, - { - "epoch": 8.47, - "learning_rate": 7.761394101876676e-06, - "loss": 0.0362, - "step": 3201 - }, - { - "epoch": 8.47, - "learning_rate": 7.74798927613941e-06, - "loss": 0.0002, - "step": 3202 - }, - { - "epoch": 8.47, - "learning_rate": 7.734584450402145e-06, - "loss": 0.1147, - "step": 3203 - }, - { - "epoch": 8.48, - "learning_rate": 7.72117962466488e-06, - "loss": 0.0002, - "step": 3204 - }, - { - "epoch": 8.48, - "learning_rate": 7.707774798927613e-06, - "loss": 0.323, - "step": 3205 - }, - { - "epoch": 8.48, - "learning_rate": 7.694369973190348e-06, - "loss": 0.0019, - "step": 3206 - }, - { - "epoch": 8.48, - "learning_rate": 7.680965147453084e-06, - "loss": 0.0002, - "step": 3207 - }, - { - "epoch": 8.49, - "learning_rate": 7.667560321715819e-06, - "loss": 0.0003, - "step": 3208 - }, - { - "epoch": 8.49, - "learning_rate": 7.654155495978554e-06, - "loss": 0.0313, - "step": 3209 - }, - { - "epoch": 8.49, - "learning_rate": 7.640750670241287e-06, - "loss": 0.0002, - "step": 3210 - }, - { - "epoch": 8.49, - "learning_rate": 7.6273458445040215e-06, - "loss": 0.0011, - "step": 3211 - }, - { - "epoch": 8.5, - "learning_rate": 7.613941018766757e-06, - "loss": 0.0007, - "step": 3212 - }, - { - "epoch": 8.5, - "learning_rate": 7.600536193029491e-06, - "loss": 0.0003, - "step": 3213 - }, - { - "epoch": 8.5, - "learning_rate": 7.587131367292226e-06, - "loss": 0.0002, - "step": 3214 - }, - { - "epoch": 8.51, - "learning_rate": 7.5737265415549595e-06, - "loss": 0.0002, - "step": 3215 - }, - { - "epoch": 8.51, - "learning_rate": 7.560321715817695e-06, - "loss": 0.0002, - "step": 3216 - }, - { - "epoch": 8.51, - "learning_rate": 7.54691689008043e-06, - "loss": 0.029, - "step": 3217 - }, - { - "epoch": 8.51, - "learning_rate": 7.533512064343164e-06, - "loss": 0.0009, - "step": 3218 - }, - { - "epoch": 8.52, - "learning_rate": 7.520107238605899e-06, - "loss": 0.3479, - "step": 3219 - }, - { - "epoch": 8.52, - "learning_rate": 7.506702412868633e-06, - "loss": 0.0002, - "step": 3220 - }, - { - "epoch": 8.52, - "learning_rate": 7.493297587131368e-06, - "loss": 0.0013, - "step": 3221 - }, - { - "epoch": 8.52, - "learning_rate": 7.479892761394103e-06, - "loss": 0.0263, - "step": 3222 - }, - { - "epoch": 8.53, - "learning_rate": 7.466487935656836e-06, - "loss": 0.0676, - "step": 3223 - }, - { - "epoch": 8.53, - "learning_rate": 7.4530831099195715e-06, - "loss": 0.0002, - "step": 3224 - }, - { - "epoch": 8.53, - "learning_rate": 7.439678284182306e-06, - "loss": 0.0002, - "step": 3225 - }, - { - "epoch": 8.53, - "learning_rate": 7.426273458445041e-06, - "loss": 0.0007, - "step": 3226 - }, - { - "epoch": 8.54, - "learning_rate": 7.412868632707776e-06, - "loss": 0.1148, - "step": 3227 - }, - { - "epoch": 8.54, - "learning_rate": 7.3994638069705094e-06, - "loss": 0.0002, - "step": 3228 - }, - { - "epoch": 8.54, - "learning_rate": 7.3860589812332446e-06, - "loss": 0.0929, - "step": 3229 - }, - { - "epoch": 8.54, - "learning_rate": 7.372654155495978e-06, - "loss": 0.0002, - "step": 3230 - }, - { - "epoch": 8.55, - "learning_rate": 7.359249329758713e-06, - "loss": 0.0282, - "step": 3231 - }, - { - "epoch": 8.55, - "learning_rate": 7.345844504021448e-06, - "loss": 0.1421, - "step": 3232 - }, - { - "epoch": 8.55, - "learning_rate": 7.3324396782841825e-06, - "loss": 0.0002, - "step": 3233 - }, - { - "epoch": 8.56, - "learning_rate": 7.319034852546918e-06, - "loss": 0.0175, - "step": 3234 - }, - { - "epoch": 8.56, - "learning_rate": 7.305630026809651e-06, - "loss": 0.0763, - "step": 3235 - }, - { - "epoch": 8.56, - "learning_rate": 7.292225201072386e-06, - "loss": 0.0003, - "step": 3236 - }, - { - "epoch": 8.56, - "learning_rate": 7.278820375335121e-06, - "loss": 0.0522, - "step": 3237 - }, - { - "epoch": 8.57, - "learning_rate": 7.265415549597855e-06, - "loss": 0.0264, - "step": 3238 - }, - { - "epoch": 8.57, - "learning_rate": 7.25201072386059e-06, - "loss": 0.0007, - "step": 3239 - }, - { - "epoch": 8.57, - "learning_rate": 7.238605898123325e-06, - "loss": 0.0002, - "step": 3240 - }, - { - "epoch": 8.57, - "learning_rate": 7.225201072386059e-06, - "loss": 0.0433, - "step": 3241 - }, - { - "epoch": 8.58, - "learning_rate": 7.2117962466487945e-06, - "loss": 0.0897, - "step": 3242 - }, - { - "epoch": 8.58, - "learning_rate": 7.198391420911528e-06, - "loss": 0.0601, - "step": 3243 - }, - { - "epoch": 8.58, - "learning_rate": 7.184986595174263e-06, - "loss": 0.0084, - "step": 3244 - }, - { - "epoch": 8.58, - "learning_rate": 7.171581769436998e-06, - "loss": 0.0224, - "step": 3245 - }, - { - "epoch": 8.59, - "learning_rate": 7.158176943699732e-06, - "loss": 0.0022, - "step": 3246 - }, - { - "epoch": 8.59, - "learning_rate": 7.144772117962467e-06, - "loss": 0.0019, - "step": 3247 - }, - { - "epoch": 8.59, - "learning_rate": 7.131367292225201e-06, - "loss": 0.0003, - "step": 3248 - }, - { - "epoch": 8.6, - "learning_rate": 7.117962466487936e-06, - "loss": 0.0002, - "step": 3249 - }, - { - "epoch": 8.6, - "learning_rate": 7.104557640750671e-06, - "loss": 0.0003, - "step": 3250 - }, - { - "epoch": 8.6, - "learning_rate": 7.091152815013405e-06, - "loss": 0.1219, - "step": 3251 - }, - { - "epoch": 8.6, - "learning_rate": 7.07774798927614e-06, - "loss": 0.0246, - "step": 3252 - }, - { - "epoch": 8.61, - "learning_rate": 7.064343163538874e-06, - "loss": 0.0005, - "step": 3253 - }, - { - "epoch": 8.61, - "learning_rate": 7.050938337801609e-06, - "loss": 0.0004, - "step": 3254 - }, - { - "epoch": 8.61, - "learning_rate": 7.037533512064344e-06, - "loss": 0.0002, - "step": 3255 - }, - { - "epoch": 8.61, - "learning_rate": 7.024128686327078e-06, - "loss": 0.0722, - "step": 3256 - }, - { - "epoch": 8.62, - "learning_rate": 7.010723860589813e-06, - "loss": 0.0006, - "step": 3257 - }, - { - "epoch": 8.62, - "learning_rate": 6.997319034852546e-06, - "loss": 0.0775, - "step": 3258 - }, - { - "epoch": 8.62, - "learning_rate": 6.9839142091152815e-06, - "loss": 0.1329, - "step": 3259 - }, - { - "epoch": 8.62, - "learning_rate": 6.970509383378017e-06, - "loss": 0.0318, - "step": 3260 - }, - { - "epoch": 8.63, - "learning_rate": 6.957104557640751e-06, - "loss": 0.0026, - "step": 3261 - }, - { - "epoch": 8.63, - "learning_rate": 6.943699731903486e-06, - "loss": 0.0119, - "step": 3262 - }, - { - "epoch": 8.63, - "learning_rate": 6.9302949061662195e-06, - "loss": 0.0194, - "step": 3263 - }, - { - "epoch": 8.63, - "learning_rate": 6.916890080428955e-06, - "loss": 0.1102, - "step": 3264 - }, - { - "epoch": 8.64, - "learning_rate": 6.90348525469169e-06, - "loss": 0.0002, - "step": 3265 - }, - { - "epoch": 8.64, - "learning_rate": 6.890080428954423e-06, - "loss": 0.0322, - "step": 3266 - }, - { - "epoch": 8.64, - "learning_rate": 6.876675603217158e-06, - "loss": 0.0083, - "step": 3267 - }, - { - "epoch": 8.65, - "learning_rate": 6.8632707774798935e-06, - "loss": 0.0947, - "step": 3268 - }, - { - "epoch": 8.65, - "learning_rate": 6.849865951742628e-06, - "loss": 0.0002, - "step": 3269 - }, - { - "epoch": 8.65, - "learning_rate": 6.836461126005363e-06, - "loss": 0.0039, - "step": 3270 - }, - { - "epoch": 8.65, - "learning_rate": 6.823056300268096e-06, - "loss": 0.106, - "step": 3271 - }, - { - "epoch": 8.66, - "learning_rate": 6.8096514745308315e-06, - "loss": 0.0107, - "step": 3272 - }, - { - "epoch": 8.66, - "learning_rate": 6.796246648793567e-06, - "loss": 0.0005, - "step": 3273 - }, - { - "epoch": 8.66, - "learning_rate": 6.7828418230563e-06, - "loss": 0.0066, - "step": 3274 - }, - { - "epoch": 8.66, - "learning_rate": 6.769436997319035e-06, - "loss": 0.0003, - "step": 3275 - }, - { - "epoch": 8.67, - "learning_rate": 6.7560321715817694e-06, - "loss": 0.0002, - "step": 3276 - }, - { - "epoch": 8.67, - "learning_rate": 6.742627345844505e-06, - "loss": 0.0008, - "step": 3277 - }, - { - "epoch": 8.67, - "learning_rate": 6.72922252010724e-06, - "loss": 0.0002, - "step": 3278 - }, - { - "epoch": 8.67, - "learning_rate": 6.715817694369973e-06, - "loss": 0.0002, - "step": 3279 - }, - { - "epoch": 8.68, - "learning_rate": 6.702412868632708e-06, - "loss": 0.0472, - "step": 3280 - }, - { - "epoch": 8.68, - "learning_rate": 6.6890080428954426e-06, - "loss": 0.0003, - "step": 3281 - }, - { - "epoch": 8.68, - "learning_rate": 6.675603217158178e-06, - "loss": 0.196, - "step": 3282 - }, - { - "epoch": 8.69, - "learning_rate": 6.662198391420913e-06, - "loss": 0.0351, - "step": 3283 - }, - { - "epoch": 8.69, - "learning_rate": 6.648793565683646e-06, - "loss": 0.0002, - "step": 3284 - }, - { - "epoch": 8.69, - "learning_rate": 6.635388739946381e-06, - "loss": 0.1151, - "step": 3285 - }, - { - "epoch": 8.69, - "learning_rate": 6.621983914209115e-06, - "loss": 0.0057, - "step": 3286 - }, - { - "epoch": 8.7, - "learning_rate": 6.60857908847185e-06, - "loss": 0.0002, - "step": 3287 - }, - { - "epoch": 8.7, - "learning_rate": 6.595174262734585e-06, - "loss": 0.0002, - "step": 3288 - }, - { - "epoch": 8.7, - "learning_rate": 6.581769436997319e-06, - "loss": 0.0774, - "step": 3289 - }, - { - "epoch": 8.7, - "learning_rate": 6.5683646112600545e-06, - "loss": 0.0004, - "step": 3290 - }, - { - "epoch": 8.71, - "learning_rate": 6.554959785522788e-06, - "loss": 0.4666, - "step": 3291 - }, - { - "epoch": 8.71, - "learning_rate": 6.541554959785523e-06, - "loss": 0.0003, - "step": 3292 - }, - { - "epoch": 8.71, - "learning_rate": 6.528150134048258e-06, - "loss": 0.0002, - "step": 3293 - }, - { - "epoch": 8.71, - "learning_rate": 6.514745308310992e-06, - "loss": 0.003, - "step": 3294 - }, - { - "epoch": 8.72, - "learning_rate": 6.501340482573727e-06, - "loss": 0.0009, - "step": 3295 - }, - { - "epoch": 8.72, - "learning_rate": 6.487935656836461e-06, - "loss": 0.0255, - "step": 3296 - }, - { - "epoch": 8.72, - "learning_rate": 6.474530831099196e-06, - "loss": 0.026, - "step": 3297 - }, - { - "epoch": 8.72, - "learning_rate": 6.461126005361931e-06, - "loss": 0.0011, - "step": 3298 - }, - { - "epoch": 8.73, - "learning_rate": 6.447721179624665e-06, - "loss": 0.0003, - "step": 3299 - }, - { - "epoch": 8.73, - "learning_rate": 6.4343163538874e-06, - "loss": 0.0001, - "step": 3300 - }, - { - "epoch": 8.73, - "learning_rate": 6.420911528150135e-06, - "loss": 0.0002, - "step": 3301 - }, - { - "epoch": 8.74, - "learning_rate": 6.4075067024128684e-06, - "loss": 0.0374, - "step": 3302 - }, - { - "epoch": 8.74, - "learning_rate": 6.3941018766756036e-06, - "loss": 0.0003, - "step": 3303 - }, - { - "epoch": 8.74, - "learning_rate": 6.380697050938338e-06, - "loss": 0.0003, - "step": 3304 - }, - { - "epoch": 8.74, - "learning_rate": 6.367292225201073e-06, - "loss": 0.0003, - "step": 3305 - }, - { - "epoch": 8.75, - "learning_rate": 6.353887399463808e-06, - "loss": 0.0002, - "step": 3306 - }, - { - "epoch": 8.75, - "learning_rate": 6.3404825737265416e-06, - "loss": 0.0002, - "step": 3307 - }, - { - "epoch": 8.75, - "learning_rate": 6.327077747989277e-06, - "loss": 0.0003, - "step": 3308 - }, - { - "epoch": 8.75, - "learning_rate": 6.31367292225201e-06, - "loss": 0.0002, - "step": 3309 - }, - { - "epoch": 8.76, - "learning_rate": 6.300268096514745e-06, - "loss": 0.0005, - "step": 3310 - }, - { - "epoch": 8.76, - "learning_rate": 6.28686327077748e-06, - "loss": 0.0003, - "step": 3311 - }, - { - "epoch": 8.76, - "learning_rate": 6.273458445040215e-06, - "loss": 0.0002, - "step": 3312 - }, - { - "epoch": 8.76, - "learning_rate": 6.26005361930295e-06, - "loss": 0.0848, - "step": 3313 - }, - { - "epoch": 8.77, - "learning_rate": 6.246648793565684e-06, - "loss": 0.0002, - "step": 3314 - }, - { - "epoch": 8.77, - "learning_rate": 6.233243967828418e-06, - "loss": 0.021, - "step": 3315 - }, - { - "epoch": 8.77, - "learning_rate": 6.219839142091153e-06, - "loss": 0.2761, - "step": 3316 - }, - { - "epoch": 8.78, - "learning_rate": 6.206434316353888e-06, - "loss": 0.0002, - "step": 3317 - }, - { - "epoch": 8.78, - "learning_rate": 6.193029490616623e-06, - "loss": 0.0309, - "step": 3318 - }, - { - "epoch": 8.78, - "learning_rate": 6.179624664879357e-06, - "loss": 0.0004, - "step": 3319 - }, - { - "epoch": 8.78, - "learning_rate": 6.1662198391420915e-06, - "loss": 0.0003, - "step": 3320 - }, - { - "epoch": 8.79, - "learning_rate": 6.152815013404826e-06, - "loss": 0.0059, - "step": 3321 - }, - { - "epoch": 8.79, - "learning_rate": 6.139410187667561e-06, - "loss": 0.0525, - "step": 3322 - }, - { - "epoch": 8.79, - "learning_rate": 6.126005361930295e-06, - "loss": 0.0002, - "step": 3323 - }, - { - "epoch": 8.79, - "learning_rate": 6.1126005361930295e-06, - "loss": 0.0002, - "step": 3324 - }, - { - "epoch": 8.8, - "learning_rate": 6.099195710455765e-06, - "loss": 0.0003, - "step": 3325 - }, - { - "epoch": 8.8, - "learning_rate": 6.085790884718499e-06, - "loss": 0.0026, - "step": 3326 - }, - { - "epoch": 8.8, - "learning_rate": 6.072386058981234e-06, - "loss": 0.0003, - "step": 3327 - }, - { - "epoch": 8.8, - "learning_rate": 6.058981233243968e-06, - "loss": 0.0137, - "step": 3328 - }, - { - "epoch": 8.81, - "learning_rate": 6.0455764075067026e-06, - "loss": 0.0003, - "step": 3329 - }, - { - "epoch": 8.81, - "learning_rate": 6.032171581769437e-06, - "loss": 0.0002, - "step": 3330 - }, - { - "epoch": 8.81, - "learning_rate": 6.018766756032172e-06, - "loss": 0.0003, - "step": 3331 - }, - { - "epoch": 8.81, - "learning_rate": 6.005361930294907e-06, - "loss": 0.0003, - "step": 3332 - }, - { - "epoch": 8.82, - "learning_rate": 5.991957104557641e-06, - "loss": 0.5064, - "step": 3333 - }, - { - "epoch": 8.82, - "learning_rate": 5.978552278820376e-06, - "loss": 0.0003, - "step": 3334 - }, - { - "epoch": 8.82, - "learning_rate": 5.96514745308311e-06, - "loss": 0.0064, - "step": 3335 - }, - { - "epoch": 8.83, - "learning_rate": 5.951742627345844e-06, - "loss": 0.0001, - "step": 3336 - }, - { - "epoch": 8.83, - "learning_rate": 5.938337801608579e-06, - "loss": 0.0003, - "step": 3337 - }, - { - "epoch": 8.83, - "learning_rate": 5.924932975871314e-06, - "loss": 0.0002, - "step": 3338 - }, - { - "epoch": 8.83, - "learning_rate": 5.911528150134049e-06, - "loss": 0.0004, - "step": 3339 - }, - { - "epoch": 8.84, - "learning_rate": 5.898123324396783e-06, - "loss": 0.0005, - "step": 3340 - }, - { - "epoch": 8.84, - "learning_rate": 5.884718498659518e-06, - "loss": 0.1297, - "step": 3341 - }, - { - "epoch": 8.84, - "learning_rate": 5.8713136729222525e-06, - "loss": 0.0324, - "step": 3342 - }, - { - "epoch": 8.84, - "learning_rate": 5.857908847184987e-06, - "loss": 0.0208, - "step": 3343 - }, - { - "epoch": 8.85, - "learning_rate": 5.844504021447721e-06, - "loss": 0.065, - "step": 3344 - }, - { - "epoch": 8.85, - "learning_rate": 5.831099195710455e-06, - "loss": 0.0323, - "step": 3345 - }, - { - "epoch": 8.85, - "learning_rate": 5.8176943699731905e-06, - "loss": 0.1872, - "step": 3346 - }, - { - "epoch": 8.85, - "learning_rate": 5.804289544235926e-06, - "loss": 0.0251, - "step": 3347 - }, - { - "epoch": 8.86, - "learning_rate": 5.79088471849866e-06, - "loss": 0.0002, - "step": 3348 - }, - { - "epoch": 8.86, - "learning_rate": 5.777479892761394e-06, - "loss": 0.1384, - "step": 3349 - }, - { - "epoch": 8.86, - "learning_rate": 5.7640750670241285e-06, - "loss": 0.0006, - "step": 3350 - }, - { - "epoch": 8.87, - "learning_rate": 5.750670241286864e-06, - "loss": 0.0004, - "step": 3351 - }, - { - "epoch": 8.87, - "learning_rate": 5.737265415549598e-06, - "loss": 0.0121, - "step": 3352 - }, - { - "epoch": 8.87, - "learning_rate": 5.723860589812333e-06, - "loss": 0.0007, - "step": 3353 - }, - { - "epoch": 8.87, - "learning_rate": 5.710455764075067e-06, - "loss": 0.0003, - "step": 3354 - }, - { - "epoch": 8.88, - "learning_rate": 5.697050938337802e-06, - "loss": 0.0003, - "step": 3355 - }, - { - "epoch": 8.88, - "learning_rate": 5.683646112600537e-06, - "loss": 0.009, - "step": 3356 - }, - { - "epoch": 8.88, - "learning_rate": 5.670241286863271e-06, - "loss": 0.0004, - "step": 3357 - }, - { - "epoch": 8.88, - "learning_rate": 5.656836461126005e-06, - "loss": 0.0009, - "step": 3358 - }, - { - "epoch": 8.89, - "learning_rate": 5.6434316353887395e-06, - "loss": 0.0005, - "step": 3359 - }, - { - "epoch": 8.89, - "learning_rate": 5.630026809651475e-06, - "loss": 0.0002, - "step": 3360 - }, - { - "epoch": 8.89, - "learning_rate": 5.61662198391421e-06, - "loss": 0.0319, - "step": 3361 - }, - { - "epoch": 8.89, - "learning_rate": 5.603217158176944e-06, - "loss": 0.0955, - "step": 3362 - }, - { - "epoch": 8.9, - "learning_rate": 5.589812332439678e-06, - "loss": 0.0706, - "step": 3363 - }, - { - "epoch": 8.9, - "learning_rate": 5.576407506702413e-06, - "loss": 0.0072, - "step": 3364 - }, - { - "epoch": 8.9, - "learning_rate": 5.563002680965148e-06, - "loss": 0.0002, - "step": 3365 - }, - { - "epoch": 8.9, - "learning_rate": 5.549597855227882e-06, - "loss": 0.0002, - "step": 3366 - }, - { - "epoch": 8.91, - "learning_rate": 5.536193029490617e-06, - "loss": 0.0018, - "step": 3367 - }, - { - "epoch": 8.91, - "learning_rate": 5.5227882037533515e-06, - "loss": 0.0002, - "step": 3368 - }, - { - "epoch": 8.91, - "learning_rate": 5.509383378016087e-06, - "loss": 0.0002, - "step": 3369 - }, - { - "epoch": 8.92, - "learning_rate": 5.495978552278821e-06, - "loss": 0.0001, - "step": 3370 - }, - { - "epoch": 8.92, - "learning_rate": 5.482573726541555e-06, - "loss": 0.0414, - "step": 3371 - }, - { - "epoch": 8.92, - "learning_rate": 5.4691689008042895e-06, - "loss": 0.0107, - "step": 3372 - }, - { - "epoch": 8.92, - "learning_rate": 5.455764075067024e-06, - "loss": 0.0252, - "step": 3373 - }, - { - "epoch": 8.93, - "learning_rate": 5.442359249329759e-06, - "loss": 0.0918, - "step": 3374 - }, - { - "epoch": 8.93, - "learning_rate": 5.428954423592494e-06, - "loss": 0.0016, - "step": 3375 - }, - { - "epoch": 8.93, - "learning_rate": 5.415549597855228e-06, - "loss": 0.0001, - "step": 3376 - }, - { - "epoch": 8.93, - "learning_rate": 5.402144772117963e-06, - "loss": 0.0002, - "step": 3377 - }, - { - "epoch": 8.94, - "learning_rate": 5.388739946380697e-06, - "loss": 0.0003, - "step": 3378 - }, - { - "epoch": 8.94, - "learning_rate": 5.375335120643432e-06, - "loss": 0.0002, - "step": 3379 - }, - { - "epoch": 8.94, - "learning_rate": 5.361930294906166e-06, - "loss": 0.0722, - "step": 3380 - }, - { - "epoch": 8.94, - "learning_rate": 5.348525469168901e-06, - "loss": 0.0002, - "step": 3381 - }, - { - "epoch": 8.95, - "learning_rate": 5.335120643431636e-06, - "loss": 0.0002, - "step": 3382 - }, - { - "epoch": 8.95, - "learning_rate": 5.32171581769437e-06, - "loss": 0.0001, - "step": 3383 - }, - { - "epoch": 8.95, - "learning_rate": 5.308310991957105e-06, - "loss": 0.0002, - "step": 3384 - }, - { - "epoch": 8.96, - "learning_rate": 5.294906166219839e-06, - "loss": 0.1, - "step": 3385 - }, - { - "epoch": 8.96, - "learning_rate": 5.281501340482574e-06, - "loss": 0.1151, - "step": 3386 - }, - { - "epoch": 8.96, - "learning_rate": 5.268096514745308e-06, - "loss": 0.0003, - "step": 3387 - }, - { - "epoch": 8.96, - "learning_rate": 5.254691689008043e-06, - "loss": 0.0001, - "step": 3388 - }, - { - "epoch": 8.97, - "learning_rate": 5.241286863270778e-06, - "loss": 0.0003, - "step": 3389 - }, - { - "epoch": 8.97, - "learning_rate": 5.2278820375335125e-06, - "loss": 0.0002, - "step": 3390 - }, - { - "epoch": 8.97, - "learning_rate": 5.214477211796247e-06, - "loss": 0.0896, - "step": 3391 - }, - { - "epoch": 8.97, - "learning_rate": 5.201072386058981e-06, - "loss": 0.0002, - "step": 3392 - }, - { - "epoch": 8.98, - "learning_rate": 5.187667560321716e-06, - "loss": 0.0003, - "step": 3393 - }, - { - "epoch": 8.98, - "learning_rate": 5.1742627345844505e-06, - "loss": 0.0003, - "step": 3394 - }, - { - "epoch": 8.98, - "learning_rate": 5.160857908847186e-06, - "loss": 0.0008, - "step": 3395 - }, - { - "epoch": 8.98, - "learning_rate": 5.14745308310992e-06, - "loss": 0.4041, - "step": 3396 - }, - { - "epoch": 8.99, - "learning_rate": 5.134048257372654e-06, - "loss": 0.0253, - "step": 3397 - }, - { - "epoch": 8.99, - "learning_rate": 5.120643431635389e-06, - "loss": 0.0355, - "step": 3398 - }, - { - "epoch": 8.99, - "learning_rate": 5.107238605898124e-06, - "loss": 0.0771, - "step": 3399 - }, - { - "epoch": 8.99, - "learning_rate": 5.093833780160858e-06, - "loss": 0.2133, - "step": 3400 - }, - { - "epoch": 9.0, - "learning_rate": 5.080428954423592e-06, - "loss": 0.0002, - "step": 3401 - }, - { - "epoch": 9.0, - "learning_rate": 5.067024128686327e-06, - "loss": 0.0022, - "step": 3402 - }, - { - "epoch": 9.0, - "eval_f1": 0.774885145482389, - "eval_loss": 1.3973581790924072, - "eval_runtime": 1.8849, - "eval_samples_per_second": 802.683, - "eval_steps_per_second": 50.4, - "step": 3402 - }, - { - "epoch": 9.0, - "learning_rate": 5.0536193029490624e-06, - "loss": 0.0002, - "step": 3403 - }, - { - "epoch": 9.01, - "learning_rate": 5.040214477211797e-06, - "loss": 0.1601, - "step": 3404 - }, - { - "epoch": 9.01, - "learning_rate": 5.026809651474531e-06, - "loss": 0.0013, - "step": 3405 - }, - { - "epoch": 9.01, - "learning_rate": 5.013404825737265e-06, - "loss": 0.0142, - "step": 3406 - }, - { - "epoch": 9.01, - "learning_rate": 5e-06, - "loss": 0.0003, - "step": 3407 - }, - { - "epoch": 9.02, - "learning_rate": 4.986595174262735e-06, - "loss": 0.0005, - "step": 3408 - }, - { - "epoch": 9.02, - "learning_rate": 4.973190348525469e-06, - "loss": 0.0002, - "step": 3409 - }, - { - "epoch": 9.02, - "learning_rate": 4.959785522788204e-06, - "loss": 0.0002, - "step": 3410 - }, - { - "epoch": 9.02, - "learning_rate": 4.946380697050938e-06, - "loss": 0.0004, - "step": 3411 - }, - { - "epoch": 9.03, - "learning_rate": 4.9329758713136735e-06, - "loss": 0.0002, - "step": 3412 - }, - { - "epoch": 9.03, - "learning_rate": 4.919571045576408e-06, - "loss": 0.0053, - "step": 3413 - }, - { - "epoch": 9.03, - "learning_rate": 4.906166219839142e-06, - "loss": 0.0005, - "step": 3414 - }, - { - "epoch": 9.03, - "learning_rate": 4.892761394101876e-06, - "loss": 0.0001, - "step": 3415 - }, - { - "epoch": 9.04, - "learning_rate": 4.8793565683646115e-06, - "loss": 0.0003, - "step": 3416 - }, - { - "epoch": 9.04, - "learning_rate": 4.865951742627347e-06, - "loss": 0.0002, - "step": 3417 - }, - { - "epoch": 9.04, - "learning_rate": 4.852546916890081e-06, - "loss": 0.0002, - "step": 3418 - }, - { - "epoch": 9.04, - "learning_rate": 4.839142091152815e-06, - "loss": 0.0004, - "step": 3419 - }, - { - "epoch": 9.05, - "learning_rate": 4.8257372654155495e-06, - "loss": 0.0002, - "step": 3420 - }, - { - "epoch": 9.05, - "learning_rate": 4.812332439678285e-06, - "loss": 0.0036, - "step": 3421 - }, - { - "epoch": 9.05, - "learning_rate": 4.798927613941019e-06, - "loss": 0.0002, - "step": 3422 - }, - { - "epoch": 9.06, - "learning_rate": 4.785522788203753e-06, - "loss": 0.0002, - "step": 3423 - }, - { - "epoch": 9.06, - "learning_rate": 4.772117962466488e-06, - "loss": 0.0002, - "step": 3424 - }, - { - "epoch": 9.06, - "learning_rate": 4.758713136729223e-06, - "loss": 0.0002, - "step": 3425 - }, - { - "epoch": 9.06, - "learning_rate": 4.745308310991958e-06, - "loss": 0.0002, - "step": 3426 - }, - { - "epoch": 9.07, - "learning_rate": 4.731903485254692e-06, - "loss": 0.0001, - "step": 3427 - }, - { - "epoch": 9.07, - "learning_rate": 4.718498659517426e-06, - "loss": 0.0004, - "step": 3428 - }, - { - "epoch": 9.07, - "learning_rate": 4.705093833780161e-06, - "loss": 0.0002, - "step": 3429 - }, - { - "epoch": 9.07, - "learning_rate": 4.691689008042896e-06, - "loss": 0.0002, - "step": 3430 - }, - { - "epoch": 9.08, - "learning_rate": 4.678284182305631e-06, - "loss": 0.0002, - "step": 3431 - }, - { - "epoch": 9.08, - "learning_rate": 4.664879356568365e-06, - "loss": 0.0568, - "step": 3432 - }, - { - "epoch": 9.08, - "learning_rate": 4.651474530831099e-06, - "loss": 0.0002, - "step": 3433 - }, - { - "epoch": 9.08, - "learning_rate": 4.638069705093834e-06, - "loss": 0.0002, - "step": 3434 - }, - { - "epoch": 9.09, - "learning_rate": 4.624664879356569e-06, - "loss": 0.0002, - "step": 3435 - }, - { - "epoch": 9.09, - "learning_rate": 4.611260053619303e-06, - "loss": 0.0019, - "step": 3436 - }, - { - "epoch": 9.09, - "learning_rate": 4.597855227882037e-06, - "loss": 0.0002, - "step": 3437 - }, - { - "epoch": 9.1, - "learning_rate": 4.5844504021447725e-06, - "loss": 0.0001, - "step": 3438 - }, - { - "epoch": 9.1, - "learning_rate": 4.571045576407507e-06, - "loss": 0.0002, - "step": 3439 - }, - { - "epoch": 9.1, - "learning_rate": 4.557640750670242e-06, - "loss": 0.03, - "step": 3440 - }, - { - "epoch": 9.1, - "learning_rate": 4.544235924932976e-06, - "loss": 0.042, - "step": 3441 - }, - { - "epoch": 9.11, - "learning_rate": 4.5308310991957105e-06, - "loss": 0.2888, - "step": 3442 - }, - { - "epoch": 9.11, - "learning_rate": 4.517426273458445e-06, - "loss": 0.0561, - "step": 3443 - }, - { - "epoch": 9.11, - "learning_rate": 4.50402144772118e-06, - "loss": 0.0227, - "step": 3444 - }, - { - "epoch": 9.11, - "learning_rate": 4.490616621983915e-06, - "loss": 0.0004, - "step": 3445 - }, - { - "epoch": 9.12, - "learning_rate": 4.477211796246649e-06, - "loss": 0.0833, - "step": 3446 - }, - { - "epoch": 9.12, - "learning_rate": 4.463806970509384e-06, - "loss": 0.0002, - "step": 3447 - }, - { - "epoch": 9.12, - "learning_rate": 4.450402144772118e-06, - "loss": 0.0268, - "step": 3448 - }, - { - "epoch": 9.12, - "learning_rate": 4.436997319034853e-06, - "loss": 0.0002, - "step": 3449 - }, - { - "epoch": 9.13, - "learning_rate": 4.423592493297587e-06, - "loss": 0.0246, - "step": 3450 - }, - { - "epoch": 9.13, - "learning_rate": 4.410187667560322e-06, - "loss": 0.0002, - "step": 3451 - }, - { - "epoch": 9.13, - "learning_rate": 4.396782841823057e-06, - "loss": 0.0002, - "step": 3452 - }, - { - "epoch": 9.13, - "learning_rate": 4.383378016085791e-06, - "loss": 0.002, - "step": 3453 - }, - { - "epoch": 9.14, - "learning_rate": 4.369973190348526e-06, - "loss": 0.0165, - "step": 3454 - }, - { - "epoch": 9.14, - "learning_rate": 4.35656836461126e-06, - "loss": 0.0398, - "step": 3455 - }, - { - "epoch": 9.14, - "learning_rate": 4.343163538873995e-06, - "loss": 0.023, - "step": 3456 - }, - { - "epoch": 9.15, - "learning_rate": 4.329758713136729e-06, - "loss": 0.0002, - "step": 3457 - }, - { - "epoch": 9.15, - "learning_rate": 4.316353887399464e-06, - "loss": 0.0024, - "step": 3458 - }, - { - "epoch": 9.15, - "learning_rate": 4.302949061662199e-06, - "loss": 0.0002, - "step": 3459 - }, - { - "epoch": 9.15, - "learning_rate": 4.2895442359249335e-06, - "loss": 0.0012, - "step": 3460 - }, - { - "epoch": 9.16, - "learning_rate": 4.276139410187668e-06, - "loss": 0.0016, - "step": 3461 - }, - { - "epoch": 9.16, - "learning_rate": 4.262734584450402e-06, - "loss": 0.0237, - "step": 3462 - }, - { - "epoch": 9.16, - "learning_rate": 4.249329758713137e-06, - "loss": 0.0002, - "step": 3463 - }, - { - "epoch": 9.16, - "learning_rate": 4.2359249329758715e-06, - "loss": 0.071, - "step": 3464 - }, - { - "epoch": 9.17, - "learning_rate": 4.222520107238606e-06, - "loss": 0.0006, - "step": 3465 - }, - { - "epoch": 9.17, - "learning_rate": 4.209115281501341e-06, - "loss": 0.0001, - "step": 3466 - }, - { - "epoch": 9.17, - "learning_rate": 4.195710455764075e-06, - "loss": 0.0002, - "step": 3467 - }, - { - "epoch": 9.17, - "learning_rate": 4.18230563002681e-06, - "loss": 0.0329, - "step": 3468 - }, - { - "epoch": 9.18, - "learning_rate": 4.168900804289545e-06, - "loss": 0.0796, - "step": 3469 - }, - { - "epoch": 9.18, - "learning_rate": 4.155495978552279e-06, - "loss": 0.0002, - "step": 3470 - }, - { - "epoch": 9.18, - "learning_rate": 4.142091152815013e-06, - "loss": 0.0724, - "step": 3471 - }, - { - "epoch": 9.19, - "learning_rate": 4.1286863270777475e-06, - "loss": 0.0002, - "step": 3472 - }, - { - "epoch": 9.19, - "learning_rate": 4.115281501340483e-06, - "loss": 0.0107, - "step": 3473 - }, - { - "epoch": 9.19, - "learning_rate": 4.101876675603218e-06, - "loss": 0.0004, - "step": 3474 - }, - { - "epoch": 9.19, - "learning_rate": 4.088471849865952e-06, - "loss": 0.0019, - "step": 3475 - }, - { - "epoch": 9.2, - "learning_rate": 4.075067024128686e-06, - "loss": 0.0002, - "step": 3476 - }, - { - "epoch": 9.2, - "learning_rate": 4.0616621983914214e-06, - "loss": 0.0002, - "step": 3477 - }, - { - "epoch": 9.2, - "learning_rate": 4.048257372654156e-06, - "loss": 0.0002, - "step": 3478 - }, - { - "epoch": 9.2, - "learning_rate": 4.03485254691689e-06, - "loss": 0.0197, - "step": 3479 - }, - { - "epoch": 9.21, - "learning_rate": 4.021447721179625e-06, - "loss": 0.0002, - "step": 3480 - }, - { - "epoch": 9.21, - "learning_rate": 4.008042895442359e-06, - "loss": 0.032, - "step": 3481 - }, - { - "epoch": 9.21, - "learning_rate": 3.9946380697050945e-06, - "loss": 0.0248, - "step": 3482 - }, - { - "epoch": 9.21, - "learning_rate": 3.981233243967829e-06, - "loss": 0.0002, - "step": 3483 - }, - { - "epoch": 9.22, - "learning_rate": 3.967828418230563e-06, - "loss": 0.0014, - "step": 3484 - }, - { - "epoch": 9.22, - "learning_rate": 3.954423592493297e-06, - "loss": 0.0305, - "step": 3485 - }, - { - "epoch": 9.22, - "learning_rate": 3.941018766756032e-06, - "loss": 0.0003, - "step": 3486 - }, - { - "epoch": 9.22, - "learning_rate": 3.927613941018767e-06, - "loss": 0.0054, - "step": 3487 - }, - { - "epoch": 9.23, - "learning_rate": 3.914209115281502e-06, - "loss": 0.0002, - "step": 3488 - }, - { - "epoch": 9.23, - "learning_rate": 3.900804289544236e-06, - "loss": 0.067, - "step": 3489 - }, - { - "epoch": 9.23, - "learning_rate": 3.8873994638069705e-06, - "loss": 0.0093, - "step": 3490 - }, - { - "epoch": 9.24, - "learning_rate": 3.873994638069705e-06, - "loss": 0.0125, - "step": 3491 - }, - { - "epoch": 9.24, - "learning_rate": 3.86058981233244e-06, - "loss": 0.0002, - "step": 3492 - }, - { - "epoch": 9.24, - "learning_rate": 3.847184986595174e-06, - "loss": 0.0001, - "step": 3493 - }, - { - "epoch": 9.24, - "learning_rate": 3.833780160857909e-06, - "loss": 0.266, - "step": 3494 - }, - { - "epoch": 9.25, - "learning_rate": 3.820375335120644e-06, - "loss": 0.2064, - "step": 3495 - }, - { - "epoch": 9.25, - "learning_rate": 3.8069705093833783e-06, - "loss": 0.021, - "step": 3496 - }, - { - "epoch": 9.25, - "learning_rate": 3.793565683646113e-06, - "loss": 0.0719, - "step": 3497 - }, - { - "epoch": 9.25, - "learning_rate": 3.7801608579088473e-06, - "loss": 0.0002, - "step": 3498 - }, - { - "epoch": 9.26, - "learning_rate": 3.766756032171582e-06, - "loss": 0.0061, - "step": 3499 - }, - { - "epoch": 9.26, - "learning_rate": 3.7533512064343163e-06, - "loss": 0.0481, - "step": 3500 - }, - { - "epoch": 9.26, - "learning_rate": 3.7399463806970514e-06, - "loss": 0.0007, - "step": 3501 - }, - { - "epoch": 9.26, - "learning_rate": 3.7265415549597857e-06, - "loss": 0.0123, - "step": 3502 - }, - { - "epoch": 9.27, - "learning_rate": 3.7131367292225204e-06, - "loss": 0.0002, - "step": 3503 - }, - { - "epoch": 9.27, - "learning_rate": 3.6997319034852547e-06, - "loss": 0.024, - "step": 3504 - }, - { - "epoch": 9.27, - "learning_rate": 3.686327077747989e-06, - "loss": 0.0002, - "step": 3505 - }, - { - "epoch": 9.28, - "learning_rate": 3.672922252010724e-06, - "loss": 0.0003, - "step": 3506 - }, - { - "epoch": 9.28, - "learning_rate": 3.659517426273459e-06, - "loss": 0.0579, - "step": 3507 - }, - { - "epoch": 9.28, - "learning_rate": 3.646112600536193e-06, - "loss": 0.0084, - "step": 3508 - }, - { - "epoch": 9.28, - "learning_rate": 3.6327077747989274e-06, - "loss": 0.0002, - "step": 3509 - }, - { - "epoch": 9.29, - "learning_rate": 3.6193029490616625e-06, - "loss": 0.0405, - "step": 3510 - }, - { - "epoch": 9.29, - "learning_rate": 3.6058981233243972e-06, - "loss": 0.0002, - "step": 3511 - }, - { - "epoch": 9.29, - "learning_rate": 3.5924932975871315e-06, - "loss": 0.0006, - "step": 3512 - }, - { - "epoch": 9.29, - "learning_rate": 3.579088471849866e-06, - "loss": 0.0002, - "step": 3513 - }, - { - "epoch": 9.3, - "learning_rate": 3.5656836461126005e-06, - "loss": 0.0002, - "step": 3514 - }, - { - "epoch": 9.3, - "learning_rate": 3.5522788203753356e-06, - "loss": 0.1122, - "step": 3515 - }, - { - "epoch": 9.3, - "learning_rate": 3.53887399463807e-06, - "loss": 0.2226, - "step": 3516 - }, - { - "epoch": 9.3, - "learning_rate": 3.5254691689008046e-06, - "loss": 0.0002, - "step": 3517 - }, - { - "epoch": 9.31, - "learning_rate": 3.512064343163539e-06, - "loss": 0.0001, - "step": 3518 - }, - { - "epoch": 9.31, - "learning_rate": 3.498659517426273e-06, - "loss": 0.0126, - "step": 3519 - }, - { - "epoch": 9.31, - "learning_rate": 3.4852546916890083e-06, - "loss": 0.0552, - "step": 3520 - }, - { - "epoch": 9.31, - "learning_rate": 3.471849865951743e-06, - "loss": 0.0002, - "step": 3521 - }, - { - "epoch": 9.32, - "learning_rate": 3.4584450402144773e-06, - "loss": 0.0001, - "step": 3522 - }, - { - "epoch": 9.32, - "learning_rate": 3.4450402144772116e-06, - "loss": 0.0002, - "step": 3523 - }, - { - "epoch": 9.32, - "learning_rate": 3.4316353887399467e-06, - "loss": 0.0001, - "step": 3524 - }, - { - "epoch": 9.33, - "learning_rate": 3.4182305630026814e-06, - "loss": 0.0027, - "step": 3525 - }, - { - "epoch": 9.33, - "learning_rate": 3.4048257372654157e-06, - "loss": 0.0275, - "step": 3526 - }, - { - "epoch": 9.33, - "learning_rate": 3.39142091152815e-06, - "loss": 0.0005, - "step": 3527 - }, - { - "epoch": 9.33, - "learning_rate": 3.3780160857908847e-06, - "loss": 0.0002, - "step": 3528 - }, - { - "epoch": 9.34, - "learning_rate": 3.36461126005362e-06, - "loss": 0.1674, - "step": 3529 - }, - { - "epoch": 9.34, - "learning_rate": 3.351206434316354e-06, - "loss": 0.0002, - "step": 3530 - }, - { - "epoch": 9.34, - "learning_rate": 3.337801608579089e-06, - "loss": 0.0004, - "step": 3531 - }, - { - "epoch": 9.34, - "learning_rate": 3.324396782841823e-06, - "loss": 0.0002, - "step": 3532 - }, - { - "epoch": 9.35, - "learning_rate": 3.3109919571045574e-06, - "loss": 0.0002, - "step": 3533 - }, - { - "epoch": 9.35, - "learning_rate": 3.2975871313672925e-06, - "loss": 0.0003, - "step": 3534 - }, - { - "epoch": 9.35, - "learning_rate": 3.2841823056300272e-06, - "loss": 0.0002, - "step": 3535 - }, - { - "epoch": 9.35, - "learning_rate": 3.2707774798927615e-06, - "loss": 0.0259, - "step": 3536 - }, - { - "epoch": 9.36, - "learning_rate": 3.257372654155496e-06, - "loss": 0.0276, - "step": 3537 - }, - { - "epoch": 9.36, - "learning_rate": 3.2439678284182305e-06, - "loss": 0.0004, - "step": 3538 - }, - { - "epoch": 9.36, - "learning_rate": 3.2305630026809657e-06, - "loss": 0.0423, - "step": 3539 - }, - { - "epoch": 9.37, - "learning_rate": 3.2171581769437e-06, - "loss": 0.0002, - "step": 3540 - }, - { - "epoch": 9.37, - "learning_rate": 3.2037533512064342e-06, - "loss": 0.0002, - "step": 3541 - }, - { - "epoch": 9.37, - "learning_rate": 3.190348525469169e-06, - "loss": 0.0004, - "step": 3542 - }, - { - "epoch": 9.37, - "learning_rate": 3.176943699731904e-06, - "loss": 0.0002, - "step": 3543 - }, - { - "epoch": 9.38, - "learning_rate": 3.1635388739946383e-06, - "loss": 0.2091, - "step": 3544 - }, - { - "epoch": 9.38, - "learning_rate": 3.1501340482573726e-06, - "loss": 0.0052, - "step": 3545 - }, - { - "epoch": 9.38, - "learning_rate": 3.1367292225201073e-06, - "loss": 0.0004, - "step": 3546 - }, - { - "epoch": 9.38, - "learning_rate": 3.123324396782842e-06, - "loss": 0.0002, - "step": 3547 - }, - { - "epoch": 9.39, - "learning_rate": 3.1099195710455763e-06, - "loss": 0.0002, - "step": 3548 - }, - { - "epoch": 9.39, - "learning_rate": 3.0965147453083115e-06, - "loss": 0.0002, - "step": 3549 - }, - { - "epoch": 9.39, - "learning_rate": 3.0831099195710457e-06, - "loss": 0.0002, - "step": 3550 - }, - { - "epoch": 9.39, - "learning_rate": 3.0697050938337804e-06, - "loss": 0.0179, - "step": 3551 - }, - { - "epoch": 9.4, - "learning_rate": 3.0563002680965147e-06, - "loss": 0.0002, - "step": 3552 - }, - { - "epoch": 9.4, - "learning_rate": 3.0428954423592494e-06, - "loss": 0.0001, - "step": 3553 - }, - { - "epoch": 9.4, - "learning_rate": 3.029490616621984e-06, - "loss": 0.0004, - "step": 3554 - }, - { - "epoch": 9.4, - "learning_rate": 3.0160857908847184e-06, - "loss": 0.0838, - "step": 3555 - }, - { - "epoch": 9.41, - "learning_rate": 3.0026809651474536e-06, - "loss": 0.0001, - "step": 3556 - }, - { - "epoch": 9.41, - "learning_rate": 2.989276139410188e-06, - "loss": 0.0004, - "step": 3557 - }, - { - "epoch": 9.41, - "learning_rate": 2.975871313672922e-06, - "loss": 0.0005, - "step": 3558 - }, - { - "epoch": 9.42, - "learning_rate": 2.962466487935657e-06, - "loss": 0.145, - "step": 3559 - }, - { - "epoch": 9.42, - "learning_rate": 2.9490616621983915e-06, - "loss": 0.0045, - "step": 3560 - }, - { - "epoch": 9.42, - "learning_rate": 2.9356568364611262e-06, - "loss": 0.2014, - "step": 3561 - }, - { - "epoch": 9.42, - "learning_rate": 2.9222520107238605e-06, - "loss": 0.0005, - "step": 3562 - }, - { - "epoch": 9.43, - "learning_rate": 2.9088471849865952e-06, - "loss": 0.0002, - "step": 3563 - }, - { - "epoch": 9.43, - "learning_rate": 2.89544235924933e-06, - "loss": 0.2962, - "step": 3564 - }, - { - "epoch": 9.43, - "learning_rate": 2.8820375335120642e-06, - "loss": 0.0253, - "step": 3565 - }, - { - "epoch": 9.43, - "learning_rate": 2.868632707774799e-06, - "loss": 0.0002, - "step": 3566 - }, - { - "epoch": 9.44, - "learning_rate": 2.8552278820375336e-06, - "loss": 0.0001, - "step": 3567 - }, - { - "epoch": 9.44, - "learning_rate": 2.8418230563002683e-06, - "loss": 0.0556, - "step": 3568 - }, - { - "epoch": 9.44, - "learning_rate": 2.8284182305630026e-06, - "loss": 0.0018, - "step": 3569 - }, - { - "epoch": 9.44, - "learning_rate": 2.8150134048257373e-06, - "loss": 0.0002, - "step": 3570 - }, - { - "epoch": 9.45, - "learning_rate": 2.801608579088472e-06, - "loss": 0.0001, - "step": 3571 - }, - { - "epoch": 9.45, - "learning_rate": 2.7882037533512063e-06, - "loss": 0.0001, - "step": 3572 - }, - { - "epoch": 9.45, - "learning_rate": 2.774798927613941e-06, - "loss": 0.0007, - "step": 3573 - }, - { - "epoch": 9.46, - "learning_rate": 2.7613941018766757e-06, - "loss": 0.0001, - "step": 3574 - }, - { - "epoch": 9.46, - "learning_rate": 2.7479892761394105e-06, - "loss": 0.0009, - "step": 3575 - }, - { - "epoch": 9.46, - "learning_rate": 2.7345844504021447e-06, - "loss": 0.0072, - "step": 3576 - }, - { - "epoch": 9.46, - "learning_rate": 2.7211796246648794e-06, - "loss": 0.0002, - "step": 3577 - }, - { - "epoch": 9.47, - "learning_rate": 2.707774798927614e-06, - "loss": 0.001, - "step": 3578 - }, - { - "epoch": 9.47, - "learning_rate": 2.6943699731903484e-06, - "loss": 0.0014, - "step": 3579 - }, - { - "epoch": 9.47, - "learning_rate": 2.680965147453083e-06, - "loss": 0.0001, - "step": 3580 - }, - { - "epoch": 9.47, - "learning_rate": 2.667560321715818e-06, - "loss": 0.0001, - "step": 3581 - }, - { - "epoch": 9.48, - "learning_rate": 2.6541554959785526e-06, - "loss": 0.0494, - "step": 3582 - }, - { - "epoch": 9.48, - "learning_rate": 2.640750670241287e-06, - "loss": 0.0002, - "step": 3583 - }, - { - "epoch": 9.48, - "learning_rate": 2.6273458445040215e-06, - "loss": 0.0002, - "step": 3584 - }, - { - "epoch": 9.48, - "learning_rate": 2.6139410187667563e-06, - "loss": 0.0002, - "step": 3585 - }, - { - "epoch": 9.49, - "learning_rate": 2.6005361930294905e-06, - "loss": 0.0019, - "step": 3586 - }, - { - "epoch": 9.49, - "learning_rate": 2.5871313672922252e-06, - "loss": 0.0006, - "step": 3587 - }, - { - "epoch": 9.49, - "learning_rate": 2.57372654155496e-06, - "loss": 0.0001, - "step": 3588 - }, - { - "epoch": 9.49, - "learning_rate": 2.5603217158176947e-06, - "loss": 0.0002, - "step": 3589 - }, - { - "epoch": 9.5, - "learning_rate": 2.546916890080429e-06, - "loss": 0.0001, - "step": 3590 - }, - { - "epoch": 9.5, - "learning_rate": 2.5335120643431636e-06, - "loss": 0.0044, - "step": 3591 - }, - { - "epoch": 9.5, - "learning_rate": 2.5201072386058984e-06, - "loss": 0.3189, - "step": 3592 - }, - { - "epoch": 9.51, - "learning_rate": 2.5067024128686326e-06, - "loss": 0.0001, - "step": 3593 - }, - { - "epoch": 9.51, - "learning_rate": 2.4932975871313673e-06, - "loss": 0.3545, - "step": 3594 - }, - { - "epoch": 9.51, - "learning_rate": 2.479892761394102e-06, - "loss": 0.0002, - "step": 3595 - }, - { - "epoch": 9.51, - "learning_rate": 2.4664879356568368e-06, - "loss": 0.0732, - "step": 3596 - }, - { - "epoch": 9.52, - "learning_rate": 2.453083109919571e-06, - "loss": 0.0001, - "step": 3597 - }, - { - "epoch": 9.52, - "learning_rate": 2.4396782841823058e-06, - "loss": 0.0002, - "step": 3598 - }, - { - "epoch": 9.52, - "learning_rate": 2.4262734584450405e-06, - "loss": 0.0864, - "step": 3599 - }, - { - "epoch": 9.52, - "learning_rate": 2.4128686327077747e-06, - "loss": 0.0695, - "step": 3600 - }, - { - "epoch": 9.53, - "learning_rate": 2.3994638069705094e-06, - "loss": 0.0002, - "step": 3601 - }, - { - "epoch": 9.53, - "learning_rate": 2.386058981233244e-06, - "loss": 0.1166, - "step": 3602 - }, - { - "epoch": 9.53, - "learning_rate": 2.372654155495979e-06, - "loss": 0.0008, - "step": 3603 - }, - { - "epoch": 9.53, - "learning_rate": 2.359249329758713e-06, - "loss": 0.0001, - "step": 3604 - }, - { - "epoch": 9.54, - "learning_rate": 2.345844504021448e-06, - "loss": 0.0125, - "step": 3605 - }, - { - "epoch": 9.54, - "learning_rate": 2.3324396782841826e-06, - "loss": 0.0001, - "step": 3606 - }, - { - "epoch": 9.54, - "learning_rate": 2.319034852546917e-06, - "loss": 0.0027, - "step": 3607 - }, - { - "epoch": 9.54, - "learning_rate": 2.3056300268096516e-06, - "loss": 0.1129, - "step": 3608 - }, - { - "epoch": 9.55, - "learning_rate": 2.2922252010723863e-06, - "loss": 0.0739, - "step": 3609 - }, - { - "epoch": 9.55, - "learning_rate": 2.278820375335121e-06, - "loss": 0.0164, - "step": 3610 - }, - { - "epoch": 9.55, - "learning_rate": 2.2654155495978552e-06, - "loss": 0.0068, - "step": 3611 - }, - { - "epoch": 9.56, - "learning_rate": 2.25201072386059e-06, - "loss": 0.0765, - "step": 3612 - }, - { - "epoch": 9.56, - "learning_rate": 2.2386058981233247e-06, - "loss": 0.0002, - "step": 3613 - }, - { - "epoch": 9.56, - "learning_rate": 2.225201072386059e-06, - "loss": 0.0002, - "step": 3614 - }, - { - "epoch": 9.56, - "learning_rate": 2.2117962466487937e-06, - "loss": 0.0003, - "step": 3615 - }, - { - "epoch": 9.57, - "learning_rate": 2.1983914209115284e-06, - "loss": 0.0252, - "step": 3616 - }, - { - "epoch": 9.57, - "learning_rate": 2.184986595174263e-06, - "loss": 0.0002, - "step": 3617 - }, - { - "epoch": 9.57, - "learning_rate": 2.1715817694369974e-06, - "loss": 0.0002, - "step": 3618 - }, - { - "epoch": 9.57, - "learning_rate": 2.158176943699732e-06, - "loss": 0.0003, - "step": 3619 - }, - { - "epoch": 9.58, - "learning_rate": 2.1447721179624668e-06, - "loss": 0.0101, - "step": 3620 - }, - { - "epoch": 9.58, - "learning_rate": 2.131367292225201e-06, - "loss": 0.0002, - "step": 3621 - }, - { - "epoch": 9.58, - "learning_rate": 2.1179624664879358e-06, - "loss": 0.0002, - "step": 3622 - }, - { - "epoch": 9.58, - "learning_rate": 2.1045576407506705e-06, - "loss": 0.126, - "step": 3623 - }, - { - "epoch": 9.59, - "learning_rate": 2.091152815013405e-06, - "loss": 0.0002, - "step": 3624 - }, - { - "epoch": 9.59, - "learning_rate": 2.0777479892761395e-06, - "loss": 0.0002, - "step": 3625 - }, - { - "epoch": 9.59, - "learning_rate": 2.0643431635388737e-06, - "loss": 0.1247, - "step": 3626 - }, - { - "epoch": 9.6, - "learning_rate": 2.050938337801609e-06, - "loss": 0.0002, - "step": 3627 - }, - { - "epoch": 9.6, - "learning_rate": 2.037533512064343e-06, - "loss": 0.0493, - "step": 3628 - }, - { - "epoch": 9.6, - "learning_rate": 2.024128686327078e-06, - "loss": 0.0526, - "step": 3629 - }, - { - "epoch": 9.6, - "learning_rate": 2.0107238605898126e-06, - "loss": 0.0672, - "step": 3630 - }, - { - "epoch": 9.61, - "learning_rate": 1.9973190348525473e-06, - "loss": 0.0141, - "step": 3631 - }, - { - "epoch": 9.61, - "learning_rate": 1.9839142091152816e-06, - "loss": 0.1962, - "step": 3632 - }, - { - "epoch": 9.61, - "learning_rate": 1.970509383378016e-06, - "loss": 0.0008, - "step": 3633 - }, - { - "epoch": 9.61, - "learning_rate": 1.957104557640751e-06, - "loss": 0.0002, - "step": 3634 - }, - { - "epoch": 9.62, - "learning_rate": 1.9436997319034853e-06, - "loss": 0.0003, - "step": 3635 - }, - { - "epoch": 9.62, - "learning_rate": 1.93029490616622e-06, - "loss": 0.0001, - "step": 3636 - }, - { - "epoch": 9.62, - "learning_rate": 1.9168900804289547e-06, - "loss": 0.0002, - "step": 3637 - }, - { - "epoch": 9.62, - "learning_rate": 1.9034852546916892e-06, - "loss": 0.0319, - "step": 3638 - }, - { - "epoch": 9.63, - "learning_rate": 1.8900804289544237e-06, - "loss": 0.0252, - "step": 3639 - }, - { - "epoch": 9.63, - "learning_rate": 1.8766756032171582e-06, - "loss": 0.0758, - "step": 3640 - }, - { - "epoch": 9.63, - "learning_rate": 1.8632707774798929e-06, - "loss": 0.0002, - "step": 3641 - }, - { - "epoch": 9.63, - "learning_rate": 1.8498659517426274e-06, - "loss": 0.0321, - "step": 3642 - }, - { - "epoch": 9.64, - "learning_rate": 1.836461126005362e-06, - "loss": 0.3423, - "step": 3643 - }, - { - "epoch": 9.64, - "learning_rate": 1.8230563002680966e-06, - "loss": 0.0007, - "step": 3644 - }, - { - "epoch": 9.64, - "learning_rate": 1.8096514745308313e-06, - "loss": 0.0163, - "step": 3645 - }, - { - "epoch": 9.65, - "learning_rate": 1.7962466487935658e-06, - "loss": 0.0001, - "step": 3646 - }, - { - "epoch": 9.65, - "learning_rate": 1.7828418230563003e-06, - "loss": 0.0001, - "step": 3647 - }, - { - "epoch": 9.65, - "learning_rate": 1.769436997319035e-06, - "loss": 0.0025, - "step": 3648 - }, - { - "epoch": 9.65, - "learning_rate": 1.7560321715817695e-06, - "loss": 0.0001, - "step": 3649 - }, - { - "epoch": 9.66, - "learning_rate": 1.7426273458445042e-06, - "loss": 0.1419, - "step": 3650 - }, - { - "epoch": 9.66, - "learning_rate": 1.7292225201072387e-06, - "loss": 0.0789, - "step": 3651 - }, - { - "epoch": 9.66, - "learning_rate": 1.7158176943699734e-06, - "loss": 0.0423, - "step": 3652 - }, - { - "epoch": 9.66, - "learning_rate": 1.7024128686327079e-06, - "loss": 0.0001, - "step": 3653 - }, - { - "epoch": 9.67, - "learning_rate": 1.6890080428954424e-06, - "loss": 0.0003, - "step": 3654 - }, - { - "epoch": 9.67, - "learning_rate": 1.675603217158177e-06, - "loss": 0.0002, - "step": 3655 - }, - { - "epoch": 9.67, - "learning_rate": 1.6621983914209116e-06, - "loss": 0.0002, - "step": 3656 - }, - { - "epoch": 9.67, - "learning_rate": 1.6487935656836463e-06, - "loss": 0.0001, - "step": 3657 - }, - { - "epoch": 9.68, - "learning_rate": 1.6353887399463808e-06, - "loss": 0.0002, - "step": 3658 - }, - { - "epoch": 9.68, - "learning_rate": 1.6219839142091153e-06, - "loss": 0.0002, - "step": 3659 - }, - { - "epoch": 9.68, - "learning_rate": 1.60857908847185e-06, - "loss": 0.0723, - "step": 3660 - }, - { - "epoch": 9.69, - "learning_rate": 1.5951742627345845e-06, - "loss": 0.0001, - "step": 3661 - }, - { - "epoch": 9.69, - "learning_rate": 1.5817694369973192e-06, - "loss": 0.0002, - "step": 3662 - }, - { - "epoch": 9.69, - "learning_rate": 1.5683646112600537e-06, - "loss": 0.1128, - "step": 3663 - }, - { - "epoch": 9.69, - "learning_rate": 1.5549597855227882e-06, - "loss": 0.0001, - "step": 3664 - }, - { - "epoch": 9.7, - "learning_rate": 1.5415549597855229e-06, - "loss": 0.0002, - "step": 3665 - }, - { - "epoch": 9.7, - "learning_rate": 1.5281501340482574e-06, - "loss": 0.0008, - "step": 3666 - }, - { - "epoch": 9.7, - "learning_rate": 1.514745308310992e-06, - "loss": 0.0846, - "step": 3667 - }, - { - "epoch": 9.7, - "learning_rate": 1.5013404825737268e-06, - "loss": 0.1065, - "step": 3668 - }, - { - "epoch": 9.71, - "learning_rate": 1.487935656836461e-06, - "loss": 0.0002, - "step": 3669 - }, - { - "epoch": 9.71, - "learning_rate": 1.4745308310991958e-06, - "loss": 0.0008, - "step": 3670 - }, - { - "epoch": 9.71, - "learning_rate": 1.4611260053619303e-06, - "loss": 0.0001, - "step": 3671 - }, - { - "epoch": 9.71, - "learning_rate": 1.447721179624665e-06, - "loss": 0.029, - "step": 3672 - }, - { - "epoch": 9.72, - "learning_rate": 1.4343163538873995e-06, - "loss": 0.0019, - "step": 3673 - }, - { - "epoch": 9.72, - "learning_rate": 1.4209115281501342e-06, - "loss": 0.0002, - "step": 3674 - }, - { - "epoch": 9.72, - "learning_rate": 1.4075067024128687e-06, - "loss": 0.0001, - "step": 3675 - }, - { - "epoch": 9.72, - "learning_rate": 1.3941018766756032e-06, - "loss": 0.0001, - "step": 3676 - }, - { - "epoch": 9.73, - "learning_rate": 1.3806970509383379e-06, - "loss": 0.0002, - "step": 3677 - }, - { - "epoch": 9.73, - "learning_rate": 1.3672922252010724e-06, - "loss": 0.0549, - "step": 3678 - }, - { - "epoch": 9.73, - "learning_rate": 1.353887399463807e-06, - "loss": 0.178, - "step": 3679 - }, - { - "epoch": 9.74, - "learning_rate": 1.3404825737265416e-06, - "loss": 0.0002, - "step": 3680 - }, - { - "epoch": 9.74, - "learning_rate": 1.3270777479892763e-06, - "loss": 0.0008, - "step": 3681 - }, - { - "epoch": 9.74, - "learning_rate": 1.3136729222520108e-06, - "loss": 0.0002, - "step": 3682 - }, - { - "epoch": 9.74, - "learning_rate": 1.3002680965147453e-06, - "loss": 0.0007, - "step": 3683 - }, - { - "epoch": 9.75, - "learning_rate": 1.28686327077748e-06, - "loss": 0.0158, - "step": 3684 - }, - { - "epoch": 9.75, - "learning_rate": 1.2734584450402145e-06, - "loss": 0.0002, - "step": 3685 - }, - { - "epoch": 9.75, - "learning_rate": 1.2600536193029492e-06, - "loss": 0.0045, - "step": 3686 - }, - { - "epoch": 9.75, - "learning_rate": 1.2466487935656837e-06, - "loss": 0.0001, - "step": 3687 - }, - { - "epoch": 9.76, - "learning_rate": 1.2332439678284184e-06, - "loss": 0.0004, - "step": 3688 - }, - { - "epoch": 9.76, - "learning_rate": 1.2198391420911529e-06, - "loss": 0.0462, - "step": 3689 - }, - { - "epoch": 9.76, - "learning_rate": 1.2064343163538874e-06, - "loss": 0.0002, - "step": 3690 - }, - { - "epoch": 9.76, - "learning_rate": 1.193029490616622e-06, - "loss": 0.4527, - "step": 3691 - }, - { - "epoch": 9.77, - "learning_rate": 1.1796246648793566e-06, - "loss": 0.0005, - "step": 3692 - }, - { - "epoch": 9.77, - "learning_rate": 1.1662198391420913e-06, - "loss": 0.0228, - "step": 3693 - }, - { - "epoch": 9.77, - "learning_rate": 1.1528150134048258e-06, - "loss": 0.1976, - "step": 3694 - }, - { - "epoch": 9.78, - "learning_rate": 1.1394101876675605e-06, - "loss": 0.0002, - "step": 3695 - }, - { - "epoch": 9.78, - "learning_rate": 1.126005361930295e-06, - "loss": 0.0122, - "step": 3696 - }, - { - "epoch": 9.78, - "learning_rate": 1.1126005361930295e-06, - "loss": 0.0215, - "step": 3697 - }, - { - "epoch": 9.78, - "learning_rate": 1.0991957104557642e-06, - "loss": 0.0072, - "step": 3698 - }, - { - "epoch": 9.79, - "learning_rate": 1.0857908847184987e-06, - "loss": 0.0002, - "step": 3699 - }, - { - "epoch": 9.79, - "learning_rate": 1.0723860589812334e-06, - "loss": 0.0002, - "step": 3700 - }, - { - "epoch": 9.79, - "learning_rate": 1.0589812332439679e-06, - "loss": 0.0002, - "step": 3701 - }, - { - "epoch": 9.79, - "learning_rate": 1.0455764075067026e-06, - "loss": 0.0472, - "step": 3702 - }, - { - "epoch": 9.8, - "learning_rate": 1.0321715817694369e-06, - "loss": 0.0002, - "step": 3703 - }, - { - "epoch": 9.8, - "learning_rate": 1.0187667560321716e-06, - "loss": 0.0002, - "step": 3704 - }, - { - "epoch": 9.8, - "learning_rate": 1.0053619302949063e-06, - "loss": 0.0783, - "step": 3705 - }, - { - "epoch": 9.8, - "learning_rate": 9.919571045576408e-07, - "loss": 0.3007, - "step": 3706 - }, - { - "epoch": 9.81, - "learning_rate": 9.785522788203755e-07, - "loss": 0.0002, - "step": 3707 - }, - { - "epoch": 9.81, - "learning_rate": 9.6514745308311e-07, - "loss": 0.0005, - "step": 3708 - }, - { - "epoch": 9.81, - "learning_rate": 9.517426273458446e-07, - "loss": 0.0002, - "step": 3709 - }, - { - "epoch": 9.81, - "learning_rate": 9.383378016085791e-07, - "loss": 0.0003, - "step": 3710 - }, - { - "epoch": 9.82, - "learning_rate": 9.249329758713137e-07, - "loss": 0.0001, - "step": 3711 - }, - { - "epoch": 9.82, - "learning_rate": 9.115281501340483e-07, - "loss": 0.0001, - "step": 3712 - }, - { - "epoch": 9.82, - "learning_rate": 8.981233243967829e-07, - "loss": 0.0002, - "step": 3713 - }, - { - "epoch": 9.83, - "learning_rate": 8.847184986595175e-07, - "loss": 0.0038, - "step": 3714 - }, - { - "epoch": 9.83, - "learning_rate": 8.713136729222521e-07, - "loss": 0.1268, - "step": 3715 - }, - { - "epoch": 9.83, - "learning_rate": 8.579088471849867e-07, - "loss": 0.1269, - "step": 3716 - }, - { - "epoch": 9.83, - "learning_rate": 8.445040214477212e-07, - "loss": 0.0004, - "step": 3717 - }, - { - "epoch": 9.84, - "learning_rate": 8.310991957104558e-07, - "loss": 0.0008, - "step": 3718 - }, - { - "epoch": 9.84, - "learning_rate": 8.176943699731904e-07, - "loss": 0.2391, - "step": 3719 - }, - { - "epoch": 9.84, - "learning_rate": 8.04289544235925e-07, - "loss": 0.1564, - "step": 3720 - }, - { - "epoch": 9.84, - "learning_rate": 7.908847184986596e-07, - "loss": 0.0001, - "step": 3721 - }, - { - "epoch": 9.85, - "learning_rate": 7.774798927613941e-07, - "loss": 0.0004, - "step": 3722 - }, - { - "epoch": 9.85, - "learning_rate": 7.640750670241287e-07, - "loss": 0.0002, - "step": 3723 - }, - { - "epoch": 9.85, - "learning_rate": 7.506702412868634e-07, - "loss": 0.0833, - "step": 3724 - }, - { - "epoch": 9.85, - "learning_rate": 7.372654155495979e-07, - "loss": 0.0002, - "step": 3725 - }, - { - "epoch": 9.86, - "learning_rate": 7.238605898123325e-07, - "loss": 0.0002, - "step": 3726 - }, - { - "epoch": 9.86, - "learning_rate": 7.104557640750671e-07, - "loss": 0.0015, - "step": 3727 - }, - { - "epoch": 9.86, - "learning_rate": 6.970509383378016e-07, - "loss": 0.0594, - "step": 3728 - }, - { - "epoch": 9.87, - "learning_rate": 6.836461126005362e-07, - "loss": 0.003, - "step": 3729 - }, - { - "epoch": 9.87, - "learning_rate": 6.702412868632708e-07, - "loss": 0.0236, - "step": 3730 - }, - { - "epoch": 9.87, - "learning_rate": 6.568364611260054e-07, - "loss": 0.0001, - "step": 3731 - }, - { - "epoch": 9.87, - "learning_rate": 6.4343163538874e-07, - "loss": 0.0209, - "step": 3732 - }, - { - "epoch": 9.88, - "learning_rate": 6.300268096514746e-07, - "loss": 0.0419, - "step": 3733 - }, - { - "epoch": 9.88, - "learning_rate": 6.166219839142092e-07, - "loss": 0.1611, - "step": 3734 - }, - { - "epoch": 9.88, - "learning_rate": 6.032171581769437e-07, - "loss": 0.0331, - "step": 3735 - }, - { - "epoch": 9.88, - "learning_rate": 5.898123324396783e-07, - "loss": 0.0015, - "step": 3736 - }, - { - "epoch": 9.89, - "learning_rate": 5.764075067024129e-07, - "loss": 0.0001, - "step": 3737 - }, - { - "epoch": 9.89, - "learning_rate": 5.630026809651475e-07, - "loss": 0.0002, - "step": 3738 - }, - { - "epoch": 9.89, - "learning_rate": 5.495978552278821e-07, - "loss": 0.0001, - "step": 3739 - }, - { - "epoch": 9.89, - "learning_rate": 5.361930294906167e-07, - "loss": 0.0695, - "step": 3740 - }, - { - "epoch": 9.9, - "learning_rate": 5.227882037533513e-07, - "loss": 0.0256, - "step": 3741 - }, - { - "epoch": 9.9, - "learning_rate": 5.093833780160858e-07, - "loss": 0.0002, - "step": 3742 - }, - { - "epoch": 9.9, - "learning_rate": 4.959785522788204e-07, - "loss": 0.0002, - "step": 3743 - }, - { - "epoch": 9.9, - "learning_rate": 4.82573726541555e-07, - "loss": 0.0001, - "step": 3744 - }, - { - "epoch": 9.91, - "learning_rate": 4.6916890080428954e-07, - "loss": 0.0002, - "step": 3745 - }, - { - "epoch": 9.91, - "learning_rate": 4.5576407506702414e-07, - "loss": 0.0002, - "step": 3746 - }, - { - "epoch": 9.91, - "learning_rate": 4.4235924932975874e-07, - "loss": 0.0055, - "step": 3747 - }, - { - "epoch": 9.92, - "learning_rate": 4.2895442359249334e-07, - "loss": 0.0002, - "step": 3748 - }, - { - "epoch": 9.92, - "learning_rate": 4.155495978552279e-07, - "loss": 0.0002, - "step": 3749 - }, - { - "epoch": 9.92, - "learning_rate": 4.021447721179625e-07, - "loss": 0.0108, - "step": 3750 - }, - { - "epoch": 9.92, - "learning_rate": 3.8873994638069704e-07, - "loss": 0.0002, - "step": 3751 - }, - { - "epoch": 9.93, - "learning_rate": 3.753351206434317e-07, - "loss": 0.0001, - "step": 3752 - }, - { - "epoch": 9.93, - "learning_rate": 3.6193029490616624e-07, - "loss": 0.0734, - "step": 3753 - }, - { - "epoch": 9.93, - "learning_rate": 3.485254691689008e-07, - "loss": 0.0001, - "step": 3754 - }, - { - "epoch": 9.93, - "learning_rate": 3.351206434316354e-07, - "loss": 0.0004, - "step": 3755 - }, - { - "epoch": 9.94, - "learning_rate": 3.2171581769437e-07, - "loss": 0.4773, - "step": 3756 - }, - { - "epoch": 9.94, - "learning_rate": 3.083109919571046e-07, - "loss": 0.0002, - "step": 3757 - }, - { - "epoch": 9.94, - "learning_rate": 2.9490616621983914e-07, - "loss": 0.0056, - "step": 3758 - }, - { - "epoch": 9.94, - "learning_rate": 2.8150134048257374e-07, - "loss": 0.0196, - "step": 3759 - }, - { - "epoch": 9.95, - "learning_rate": 2.6809651474530835e-07, - "loss": 0.0002, - "step": 3760 - }, - { - "epoch": 9.95, - "learning_rate": 2.546916890080429e-07, - "loss": 0.07, - "step": 3761 - }, - { - "epoch": 9.95, - "learning_rate": 2.412868632707775e-07, - "loss": 0.0003, - "step": 3762 - }, - { - "epoch": 9.96, - "learning_rate": 2.2788203753351207e-07, - "loss": 0.0217, - "step": 3763 - }, - { - "epoch": 9.96, - "learning_rate": 2.1447721179624667e-07, - "loss": 0.0001, - "step": 3764 - }, - { - "epoch": 9.96, - "learning_rate": 2.0107238605898125e-07, - "loss": 0.0004, - "step": 3765 - }, - { - "epoch": 9.96, - "learning_rate": 1.8766756032171585e-07, - "loss": 0.0001, - "step": 3766 - }, - { - "epoch": 9.97, - "learning_rate": 1.742627345844504e-07, - "loss": 0.0005, - "step": 3767 - }, - { - "epoch": 9.97, - "learning_rate": 1.60857908847185e-07, - "loss": 0.0255, - "step": 3768 - }, - { - "epoch": 9.97, - "learning_rate": 1.4745308310991957e-07, - "loss": 0.1316, - "step": 3769 - }, - { - "epoch": 9.97, - "learning_rate": 1.3404825737265417e-07, - "loss": 0.0002, - "step": 3770 - }, - { - "epoch": 9.98, - "learning_rate": 1.2064343163538875e-07, - "loss": 0.0057, - "step": 3771 - }, - { - "epoch": 9.98, - "learning_rate": 1.0723860589812334e-07, - "loss": 0.1064, - "step": 3772 - }, - { - "epoch": 9.98, - "learning_rate": 9.383378016085792e-08, - "loss": 0.0002, - "step": 3773 - }, - { - "epoch": 9.98, - "learning_rate": 8.04289544235925e-08, - "loss": 0.0678, - "step": 3774 - }, - { - "epoch": 9.99, - "learning_rate": 6.702412868632709e-08, - "loss": 0.0914, - "step": 3775 - }, - { - "epoch": 9.99, - "learning_rate": 5.361930294906167e-08, - "loss": 0.0001, - "step": 3776 - }, - { - "epoch": 9.99, - "learning_rate": 4.021447721179625e-08, - "loss": 0.448, - "step": 3777 - }, - { - "epoch": 9.99, - "learning_rate": 2.6809651474530834e-08, - "loss": 0.0002, - "step": 3778 - }, - { - "epoch": 10.0, - "learning_rate": 1.3404825737265417e-08, - "loss": 0.0027, - "step": 3779 - }, - { - "epoch": 10.0, - "learning_rate": 0.0, - "loss": 0.0051, - "step": 3780 - }, - { - "epoch": 10.0, - "eval_f1": 0.7801857585139318, - "eval_loss": 1.4196751117706299, - "eval_runtime": 1.9725, - "eval_samples_per_second": 767.032, - "eval_steps_per_second": 48.161, - "step": 3780 - } - ], - "max_steps": 3780, - "num_train_epochs": 10, - "total_flos": 968262171263616.0, - "trial_name": null, - "trial_params": null -}