|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.995635229929852, |
|
"eval_steps": 500, |
|
"global_step": 3204, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.123711340206186e-07, |
|
"loss": 3.2515, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.247422680412372e-07, |
|
"loss": 3.2363, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2371134020618557e-06, |
|
"loss": 3.2793, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6494845360824744e-06, |
|
"loss": 3.2382, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.061855670103093e-06, |
|
"loss": 3.273, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4742268041237115e-06, |
|
"loss": 3.2146, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.8865979381443297e-06, |
|
"loss": 3.1379, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.298969072164949e-06, |
|
"loss": 3.1523, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.7113402061855674e-06, |
|
"loss": 3.0647, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.123711340206186e-06, |
|
"loss": 3.067, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.536082474226804e-06, |
|
"loss": 3.1078, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.948453608247423e-06, |
|
"loss": 2.9914, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.360824742268042e-06, |
|
"loss": 2.9851, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.7731958762886594e-06, |
|
"loss": 2.911, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.185567010309279e-06, |
|
"loss": 3.0351, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.597938144329898e-06, |
|
"loss": 3.0463, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.010309278350515e-06, |
|
"loss": 2.9003, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.422680412371135e-06, |
|
"loss": 2.9228, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.835051546391754e-06, |
|
"loss": 2.9414, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.247422680412371e-06, |
|
"loss": 2.8951, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.65979381443299e-06, |
|
"loss": 2.9161, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.072164948453609e-06, |
|
"loss": 2.9445, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.484536082474226e-06, |
|
"loss": 2.8693, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.896907216494846e-06, |
|
"loss": 2.8698, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515464e-05, |
|
"loss": 2.8505, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0721649484536083e-05, |
|
"loss": 2.8884, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1134020618556703e-05, |
|
"loss": 2.8652, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1546391752577319e-05, |
|
"loss": 2.8146, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1958762886597938e-05, |
|
"loss": 2.805, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.2371134020618558e-05, |
|
"loss": 2.855, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.2783505154639176e-05, |
|
"loss": 2.79, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3195876288659795e-05, |
|
"loss": 2.8766, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3608247422680415e-05, |
|
"loss": 2.7995, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.402061855670103e-05, |
|
"loss": 2.7542, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.443298969072165e-05, |
|
"loss": 2.7528, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.484536082474227e-05, |
|
"loss": 2.7848, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5257731958762888e-05, |
|
"loss": 2.7701, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5670103092783507e-05, |
|
"loss": 2.7374, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6082474226804127e-05, |
|
"loss": 2.6963, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6494845360824743e-05, |
|
"loss": 2.6799, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6907216494845362e-05, |
|
"loss": 2.7273, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.731958762886598e-05, |
|
"loss": 2.6891, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7731958762886598e-05, |
|
"loss": 2.6861, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8144329896907217e-05, |
|
"loss": 2.7364, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8556701030927837e-05, |
|
"loss": 2.5863, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8969072164948453e-05, |
|
"loss": 2.6845, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9381443298969072e-05, |
|
"loss": 2.7043, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9793814432989692e-05, |
|
"loss": 2.631, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9999994888042835e-05, |
|
"loss": 2.6999, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.999995399241686e-05, |
|
"loss": 2.688, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.999987220133215e-05, |
|
"loss": 2.5822, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.99997495151232e-05, |
|
"loss": 2.6453, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.999958593429174e-05, |
|
"loss": 2.59, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.999938145950675e-05, |
|
"loss": 2.5822, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9999136091604433e-05, |
|
"loss": 2.5116, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.999884983158825e-05, |
|
"loss": 2.5744, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9998522680628868e-05, |
|
"loss": 2.5379, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9998154640064196e-05, |
|
"loss": 2.5487, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9997745711399364e-05, |
|
"loss": 2.6037, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9997295896306706e-05, |
|
"loss": 2.6105, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.999680519662577e-05, |
|
"loss": 2.5882, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.999627361436331e-05, |
|
"loss": 2.5904, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9995701151693265e-05, |
|
"loss": 2.5657, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.999508781095675e-05, |
|
"loss": 2.603, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.999443359466207e-05, |
|
"loss": 2.543, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9993738505484684e-05, |
|
"loss": 2.6112, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9993002546267203e-05, |
|
"loss": 2.5608, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9992225720019377e-05, |
|
"loss": 2.528, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9991408029918086e-05, |
|
"loss": 2.5347, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9990549479307334e-05, |
|
"loss": 2.4668, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9989650071698214e-05, |
|
"loss": 2.5576, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9988709810768914e-05, |
|
"loss": 2.4433, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.998772870036469e-05, |
|
"loss": 2.5095, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9986706744497857e-05, |
|
"loss": 2.5265, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.998564394734777e-05, |
|
"loss": 2.3981, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9984540313260808e-05, |
|
"loss": 2.4508, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.998339584675035e-05, |
|
"loss": 2.4684, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.998221055249677e-05, |
|
"loss": 2.4673, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.99809844353474e-05, |
|
"loss": 2.46, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9979717500316532e-05, |
|
"loss": 2.4691, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.997840975258538e-05, |
|
"loss": 2.4876, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9977061197502055e-05, |
|
"loss": 2.3855, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.997567184058156e-05, |
|
"loss": 2.4862, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9974241687505772e-05, |
|
"loss": 2.3955, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.997277074412338e-05, |
|
"loss": 2.4416, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9971259016449913e-05, |
|
"loss": 2.4385, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9969706510667676e-05, |
|
"loss": 2.4228, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.996811323312574e-05, |
|
"loss": 2.4402, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9966479190339913e-05, |
|
"loss": 2.3951, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9964804388992725e-05, |
|
"loss": 2.4134, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9963088835933386e-05, |
|
"loss": 2.3631, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9961332538177754e-05, |
|
"loss": 2.42, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9959535502908326e-05, |
|
"loss": 2.4121, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9957697737474198e-05, |
|
"loss": 2.3994, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.995581924939102e-05, |
|
"loss": 2.391, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9953900046341005e-05, |
|
"loss": 2.4389, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9951940136172846e-05, |
|
"loss": 2.3884, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9949939526901724e-05, |
|
"loss": 2.4058, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9947898226709267e-05, |
|
"loss": 2.3603, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9945816243943495e-05, |
|
"loss": 2.3278, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9943693587118818e-05, |
|
"loss": 2.4248, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.994153026491598e-05, |
|
"loss": 2.3969, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9939326286182016e-05, |
|
"loss": 2.327, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9937081659930255e-05, |
|
"loss": 2.4033, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9934796395340228e-05, |
|
"loss": 2.392, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.993247050175768e-05, |
|
"loss": 2.391, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.99301039886945e-05, |
|
"loss": 2.3343, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9927696865828698e-05, |
|
"loss": 2.3227, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9925249143004353e-05, |
|
"loss": 2.3913, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9922760830231597e-05, |
|
"loss": 2.3677, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9920231937686538e-05, |
|
"loss": 2.3598, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9917662475711247e-05, |
|
"loss": 2.3573, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9915052454813705e-05, |
|
"loss": 2.4229, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9912401885667765e-05, |
|
"loss": 2.2836, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9909710779113093e-05, |
|
"loss": 2.2907, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.990697914615515e-05, |
|
"loss": 2.3445, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9904206997965123e-05, |
|
"loss": 2.3142, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9901394345879893e-05, |
|
"loss": 2.2854, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.989854120140198e-05, |
|
"loss": 2.3004, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9895647576199507e-05, |
|
"loss": 2.272, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9892713482106135e-05, |
|
"loss": 2.329, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.988973893112104e-05, |
|
"loss": 2.3237, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9886723935408835e-05, |
|
"loss": 2.3882, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9883668507299538e-05, |
|
"loss": 2.3263, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.988057265928853e-05, |
|
"loss": 2.3502, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9877436404036466e-05, |
|
"loss": 2.2923, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.987425975436928e-05, |
|
"loss": 2.3222, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9871042723278078e-05, |
|
"loss": 2.319, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9867785323919116e-05, |
|
"loss": 2.2974, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9864487569613747e-05, |
|
"loss": 2.331, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9861149473848343e-05, |
|
"loss": 2.2826, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9857771050274267e-05, |
|
"loss": 2.3282, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.98543523127078e-05, |
|
"loss": 2.2752, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9850893275130085e-05, |
|
"loss": 2.2815, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9847393951687087e-05, |
|
"loss": 2.1985, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.984385435668951e-05, |
|
"loss": 2.2914, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9840274504612763e-05, |
|
"loss": 2.244, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.983665441009688e-05, |
|
"loss": 2.2515, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.983299408794647e-05, |
|
"loss": 2.2607, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9829293553130656e-05, |
|
"loss": 2.226, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9825552820783018e-05, |
|
"loss": 2.2799, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.982177190620152e-05, |
|
"loss": 2.2573, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.981795082484846e-05, |
|
"loss": 2.276, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9814089592350395e-05, |
|
"loss": 2.2828, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9810188224498086e-05, |
|
"loss": 2.2221, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.980624673724643e-05, |
|
"loss": 2.3025, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9802265146714393e-05, |
|
"loss": 2.1947, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9798243469184947e-05, |
|
"loss": 2.3123, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9794181721105002e-05, |
|
"loss": 2.3382, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.979007991908534e-05, |
|
"loss": 2.2656, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9785938079900547e-05, |
|
"loss": 2.2834, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9781756220488938e-05, |
|
"loss": 2.249, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9777534357952503e-05, |
|
"loss": 2.2662, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.977327250955682e-05, |
|
"loss": 2.1953, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.976897069273099e-05, |
|
"loss": 2.2089, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9764628925067576e-05, |
|
"loss": 2.2576, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.976024722432252e-05, |
|
"loss": 2.1983, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9755825608415065e-05, |
|
"loss": 2.2231, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9751364095427694e-05, |
|
"loss": 2.2898, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.974686270360606e-05, |
|
"loss": 2.2599, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9742321451358887e-05, |
|
"loss": 2.2066, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.973774035725793e-05, |
|
"loss": 2.2213, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9733119440037863e-05, |
|
"loss": 2.2995, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9728458718596228e-05, |
|
"loss": 2.2125, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9723758211993344e-05, |
|
"loss": 2.2308, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.971901793945224e-05, |
|
"loss": 2.2471, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9714237920358566e-05, |
|
"loss": 2.2093, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9709418174260523e-05, |
|
"loss": 2.2243, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9704558720868768e-05, |
|
"loss": 2.2659, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9699659580056353e-05, |
|
"loss": 2.2232, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9694720771858632e-05, |
|
"loss": 2.2668, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.968974231647318e-05, |
|
"loss": 2.2309, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9684724234259715e-05, |
|
"loss": 2.1846, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9679666545740002e-05, |
|
"loss": 2.3046, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9674569271597792e-05, |
|
"loss": 2.233, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9669432432678713e-05, |
|
"loss": 2.2801, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.96642560499902e-05, |
|
"loss": 2.1939, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9659040144701412e-05, |
|
"loss": 2.2114, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9653784738143122e-05, |
|
"loss": 2.1578, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9648489851807662e-05, |
|
"loss": 2.2855, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9643155507348807e-05, |
|
"loss": 2.1823, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9637781726581706e-05, |
|
"loss": 2.1782, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.963236853148278e-05, |
|
"loss": 2.1873, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9626915944189647e-05, |
|
"loss": 2.1374, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9621423987001013e-05, |
|
"loss": 2.2583, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.961589268237659e-05, |
|
"loss": 2.1622, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.961032205293701e-05, |
|
"loss": 2.2058, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9604712121463713e-05, |
|
"loss": 2.1792, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.959906291089889e-05, |
|
"loss": 2.2287, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.959337444434534e-05, |
|
"loss": 2.1758, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9587646745066424e-05, |
|
"loss": 2.2457, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9581879836485936e-05, |
|
"loss": 2.2054, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9576073742188022e-05, |
|
"loss": 2.2095, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.957022848591708e-05, |
|
"loss": 2.167, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.9564344091577664e-05, |
|
"loss": 2.1353, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.9558420583234382e-05, |
|
"loss": 2.2185, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.955245798511181e-05, |
|
"loss": 2.1465, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.9546456321594374e-05, |
|
"loss": 2.1234, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.954041561722627e-05, |
|
"loss": 2.2211, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9534335896711344e-05, |
|
"loss": 2.1423, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.952821718491301e-05, |
|
"loss": 2.1656, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9522059506854133e-05, |
|
"loss": 2.1568, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9515862887716943e-05, |
|
"loss": 2.1858, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.95096273528429e-05, |
|
"loss": 2.1204, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9503352927732645e-05, |
|
"loss": 2.1687, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.9497039638045833e-05, |
|
"loss": 2.1762, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9490687509601073e-05, |
|
"loss": 2.1915, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.948429656837581e-05, |
|
"loss": 2.1705, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9477866840506205e-05, |
|
"loss": 2.1434, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.947139835228705e-05, |
|
"loss": 2.1287, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.9464891130171647e-05, |
|
"loss": 2.0862, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.9458345200771697e-05, |
|
"loss": 2.2316, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.9451760590857207e-05, |
|
"loss": 2.1802, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.944513732735636e-05, |
|
"loss": 2.1693, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.943847543735543e-05, |
|
"loss": 2.1848, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.9431774948098633e-05, |
|
"loss": 2.2342, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.942503588698806e-05, |
|
"loss": 2.1495, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.9418258281583545e-05, |
|
"loss": 2.1224, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.9411442159602532e-05, |
|
"loss": 2.1866, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.940458754892e-05, |
|
"loss": 2.1382, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.939769447756832e-05, |
|
"loss": 2.1603, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.939076297373715e-05, |
|
"loss": 2.1242, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.9383793065773335e-05, |
|
"loss": 2.1245, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.9376784782180747e-05, |
|
"loss": 2.1903, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.9369738151620228e-05, |
|
"loss": 2.118, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.936265320290943e-05, |
|
"loss": 2.1582, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.9355529965022703e-05, |
|
"loss": 2.1664, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.9348368467090988e-05, |
|
"loss": 2.1747, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.9341168738401696e-05, |
|
"loss": 2.1592, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.933393080839859e-05, |
|
"loss": 2.1831, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.932665470668164e-05, |
|
"loss": 2.133, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.931934046300695e-05, |
|
"loss": 2.1431, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.9311988107286584e-05, |
|
"loss": 2.131, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.9304597669588472e-05, |
|
"loss": 2.1011, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.92971691801363e-05, |
|
"loss": 2.1485, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.928970266930934e-05, |
|
"loss": 2.1388, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.928219816764238e-05, |
|
"loss": 2.1475, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9274655705825566e-05, |
|
"loss": 2.1372, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9267075314704282e-05, |
|
"loss": 2.1498, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.925945702527903e-05, |
|
"loss": 2.1308, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9251800868705292e-05, |
|
"loss": 2.122, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.924410687629342e-05, |
|
"loss": 2.0804, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9236375079508492e-05, |
|
"loss": 2.0633, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.922860550997019e-05, |
|
"loss": 2.1071, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9220798199452676e-05, |
|
"loss": 2.0868, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9212953179884443e-05, |
|
"loss": 2.1227, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9205070483348216e-05, |
|
"loss": 2.0897, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9197150142080784e-05, |
|
"loss": 2.1388, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9189192188472902e-05, |
|
"loss": 2.1197, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9181196655069126e-05, |
|
"loss": 2.1265, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.917316357456772e-05, |
|
"loss": 2.1214, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.9165092979820478e-05, |
|
"loss": 2.1106, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.915698490383262e-05, |
|
"loss": 2.0805, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.914883937976265e-05, |
|
"loss": 2.0937, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9140656440922216e-05, |
|
"loss": 2.071, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9132436120775967e-05, |
|
"loss": 2.109, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9124178452941445e-05, |
|
"loss": 2.1233, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9115883471188916e-05, |
|
"loss": 2.0982, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9107551209441238e-05, |
|
"loss": 2.1426, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9099181701773735e-05, |
|
"loss": 2.1051, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9090774982414056e-05, |
|
"loss": 2.1288, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9082331085742018e-05, |
|
"loss": 2.089, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.9073850046289484e-05, |
|
"loss": 2.061, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.9065331898740216e-05, |
|
"loss": 2.1479, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.9056776677929726e-05, |
|
"loss": 2.0863, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.9048184418845146e-05, |
|
"loss": 2.1404, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9039555156625072e-05, |
|
"loss": 2.0718, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9030888926559436e-05, |
|
"loss": 2.1136, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.902218576408934e-05, |
|
"loss": 2.0681, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9013445704806933e-05, |
|
"loss": 2.123, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.900466878445525e-05, |
|
"loss": 2.0872, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8995855038928078e-05, |
|
"loss": 2.1093, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.89870045042698e-05, |
|
"loss": 2.052, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8978117216675246e-05, |
|
"loss": 2.1257, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8969193212489557e-05, |
|
"loss": 2.0837, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.896023252820802e-05, |
|
"loss": 2.097, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.8951235200475942e-05, |
|
"loss": 2.0979, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.894220126608847e-05, |
|
"loss": 2.1158, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8933130761990465e-05, |
|
"loss": 2.1114, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8924023725276345e-05, |
|
"loss": 2.119, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8914880193189912e-05, |
|
"loss": 2.0998, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8905700203124248e-05, |
|
"loss": 2.0889, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8896483792621504e-05, |
|
"loss": 2.1034, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8887230999372795e-05, |
|
"loss": 2.0658, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8877941861218018e-05, |
|
"loss": 2.1129, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8868616416145696e-05, |
|
"loss": 2.1012, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8859254702292847e-05, |
|
"loss": 2.0656, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8849856757944804e-05, |
|
"loss": 2.1631, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8840422621535067e-05, |
|
"loss": 2.0432, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8830952331645144e-05, |
|
"loss": 2.0827, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8821445927004406e-05, |
|
"loss": 2.0975, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8811903446489905e-05, |
|
"loss": 2.0677, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8802324929126232e-05, |
|
"loss": 2.0849, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8792710414085356e-05, |
|
"loss": 2.1223, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8783059940686454e-05, |
|
"loss": 2.0426, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8773373548395762e-05, |
|
"loss": 2.0512, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8763651276826417e-05, |
|
"loss": 2.0999, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.8753893165738267e-05, |
|
"loss": 2.0729, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8744099255037737e-05, |
|
"loss": 2.078, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.873426958477767e-05, |
|
"loss": 2.0587, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8724404195157127e-05, |
|
"loss": 2.0845, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.871450312652126e-05, |
|
"loss": 2.0577, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8704566419361137e-05, |
|
"loss": 2.0777, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8694594114313553e-05, |
|
"loss": 2.085, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8684586252160904e-05, |
|
"loss": 2.1124, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8674542873830986e-05, |
|
"loss": 2.0271, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.8664464020396844e-05, |
|
"loss": 2.0369, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.86543497330766e-05, |
|
"loss": 2.0503, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.864420005323329e-05, |
|
"loss": 2.0603, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.8634015022374683e-05, |
|
"loss": 2.0858, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.8623794682153122e-05, |
|
"loss": 2.0202, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.8613539074365353e-05, |
|
"loss": 2.0622, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.8603248240952342e-05, |
|
"loss": 1.9967, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.859292222399912e-05, |
|
"loss": 2.0614, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8582561065734602e-05, |
|
"loss": 2.0637, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8572164808531417e-05, |
|
"loss": 2.0702, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8561733494905728e-05, |
|
"loss": 2.1353, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.855126716751707e-05, |
|
"loss": 2.0729, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.854076586916816e-05, |
|
"loss": 2.0428, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.8530229642804742e-05, |
|
"loss": 1.9822, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.8519658531515397e-05, |
|
"loss": 2.0314, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.850905257853136e-05, |
|
"loss": 2.0784, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.849841182722637e-05, |
|
"loss": 2.0722, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8487736321116466e-05, |
|
"loss": 2.0681, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8477026103859823e-05, |
|
"loss": 2.0748, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.846628121925656e-05, |
|
"loss": 2.0481, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.845550171124858e-05, |
|
"loss": 2.0585, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8444687623919388e-05, |
|
"loss": 2.0695, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.843383900149388e-05, |
|
"loss": 2.068, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8422955888338207e-05, |
|
"loss": 2.0855, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.841203832895956e-05, |
|
"loss": 1.9948, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.840108636800601e-05, |
|
"loss": 2.0412, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.8390100050266305e-05, |
|
"loss": 2.0012, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.8379079420669702e-05, |
|
"loss": 2.0683, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.8368024524285784e-05, |
|
"loss": 2.0818, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.835693540632426e-05, |
|
"loss": 2.0211, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.8345812112134795e-05, |
|
"loss": 2.0427, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.833465468720682e-05, |
|
"loss": 2.0726, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.832346317716935e-05, |
|
"loss": 2.015, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.8312237627790783e-05, |
|
"loss": 2.0292, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.8300978084978736e-05, |
|
"loss": 2.0428, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.8289684594779835e-05, |
|
"loss": 1.9837, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.8278357203379536e-05, |
|
"loss": 2.0786, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.8266995957101944e-05, |
|
"loss": 2.0286, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.825560090240961e-05, |
|
"loss": 2.0611, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.824417208590334e-05, |
|
"loss": 2.0859, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.8232709554322027e-05, |
|
"loss": 2.0911, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.822121335454243e-05, |
|
"loss": 2.0973, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.8209683533579006e-05, |
|
"loss": 2.0489, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.81981201385837e-05, |
|
"loss": 2.0586, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.8186523216845763e-05, |
|
"loss": 2.0523, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.8174892815791563e-05, |
|
"loss": 2.07, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.816322898298437e-05, |
|
"loss": 2.0633, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.8151531766124186e-05, |
|
"loss": 2.0214, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8139801213047538e-05, |
|
"loss": 2.0275, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.812803737172728e-05, |
|
"loss": 2.0443, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.81162402902724e-05, |
|
"loss": 2.0277, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8104410016927828e-05, |
|
"loss": 2.0317, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.8092546600074236e-05, |
|
"loss": 2.0425, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.8080650088227824e-05, |
|
"loss": 2.0706, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.8068720530040157e-05, |
|
"loss": 2.0114, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.805675797429793e-05, |
|
"loss": 1.9992, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.804476246992279e-05, |
|
"loss": 2.0038, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8032734065971125e-05, |
|
"loss": 2.0611, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8020672811633874e-05, |
|
"loss": 2.0218, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.800857875623632e-05, |
|
"loss": 2.0177, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.799645194923788e-05, |
|
"loss": 2.0428, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7984292440231915e-05, |
|
"loss": 2.0229, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7972100278945527e-05, |
|
"loss": 2.0203, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.795987551523935e-05, |
|
"loss": 1.9437, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.794761819910734e-05, |
|
"loss": 1.9968, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.7935328380676587e-05, |
|
"loss": 1.9886, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.79230061102071e-05, |
|
"loss": 2.0842, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.79106514380916e-05, |
|
"loss": 2.0212, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7898264414855314e-05, |
|
"loss": 2.0567, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7885845091155786e-05, |
|
"loss": 2.0346, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.787339351778263e-05, |
|
"loss": 1.9744, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.786090974565737e-05, |
|
"loss": 2.0173, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.784839382583319e-05, |
|
"loss": 2.0722, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.783584580949477e-05, |
|
"loss": 1.9932, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.782326574795802e-05, |
|
"loss": 2.0283, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.781065369266992e-05, |
|
"loss": 2.0102, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.7798009695208288e-05, |
|
"loss": 1.9523, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.7785333807281567e-05, |
|
"loss": 2.0264, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.7772626080728624e-05, |
|
"loss": 1.9718, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.775988656751852e-05, |
|
"loss": 2.0733, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.774711531975033e-05, |
|
"loss": 2.0576, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7734312389652893e-05, |
|
"loss": 2.0249, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7721477829584617e-05, |
|
"loss": 1.9813, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7708611692033265e-05, |
|
"loss": 1.9984, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.769571402961575e-05, |
|
"loss": 2.0084, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.768278489507788e-05, |
|
"loss": 1.9982, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.7669824341294203e-05, |
|
"loss": 1.9854, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.765683242126773e-05, |
|
"loss": 2.0392, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.7643809188129765e-05, |
|
"loss": 2.0015, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.763075469513966e-05, |
|
"loss": 2.0244, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.761766899568461e-05, |
|
"loss": 2.0581, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.7604552143279424e-05, |
|
"loss": 1.9923, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.759140419156633e-05, |
|
"loss": 1.911, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.7578225194314717e-05, |
|
"loss": 2.0443, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.7565015205420946e-05, |
|
"loss": 1.9745, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.7551774278908128e-05, |
|
"loss": 1.9708, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.7538502468925887e-05, |
|
"loss": 2.009, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.7525199829750145e-05, |
|
"loss": 1.9965, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.7511866415782908e-05, |
|
"loss": 2.0283, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.749850228155203e-05, |
|
"loss": 2.0294, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.7485107481711014e-05, |
|
"loss": 2.0089, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.747168207103875e-05, |
|
"loss": 2.0058, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.7458226104439324e-05, |
|
"loss": 2.0208, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.7444739636941786e-05, |
|
"loss": 1.945, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.7431222723699916e-05, |
|
"loss": 2.0033, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.7417675419992003e-05, |
|
"loss": 1.9301, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.7404097781220625e-05, |
|
"loss": 1.9678, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.739048986291241e-05, |
|
"loss": 2.0263, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.7376851720717826e-05, |
|
"loss": 1.9615, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.7363183410410933e-05, |
|
"loss": 2.0027, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.734948498788917e-05, |
|
"loss": 1.9357, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.7335756509173128e-05, |
|
"loss": 1.9003, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.7321998030406303e-05, |
|
"loss": 1.9607, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.730820960785488e-05, |
|
"loss": 1.9667, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.729439129790752e-05, |
|
"loss": 1.9807, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.728054315707508e-05, |
|
"loss": 1.8775, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.726666524199043e-05, |
|
"loss": 1.9992, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.7252757609408216e-05, |
|
"loss": 2.0187, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.7238820316204582e-05, |
|
"loss": 1.9714, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.7224853419377e-05, |
|
"loss": 1.9563, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.7210856976043995e-05, |
|
"loss": 1.9302, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.719683104344493e-05, |
|
"loss": 1.9393, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.718277567893976e-05, |
|
"loss": 1.9751, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.7168690940008813e-05, |
|
"loss": 1.9714, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.7154576884252535e-05, |
|
"loss": 1.9784, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.7140433569391275e-05, |
|
"loss": 1.9815, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.7126261053265025e-05, |
|
"loss": 1.9715, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.7112059393833217e-05, |
|
"loss": 2.0431, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.709782864917445e-05, |
|
"loss": 1.9463, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.7083568877486278e-05, |
|
"loss": 1.964, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.7069280137084955e-05, |
|
"loss": 1.9445, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.7054962486405212e-05, |
|
"loss": 1.9913, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.704061598400001e-05, |
|
"loss": 1.923, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.7026240688540295e-05, |
|
"loss": 1.9334, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.7011836658814766e-05, |
|
"loss": 1.944, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.699740395372964e-05, |
|
"loss": 1.9318, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.6982942632308396e-05, |
|
"loss": 1.9421, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.6968452753691543e-05, |
|
"loss": 1.9397, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.6953934377136375e-05, |
|
"loss": 1.9486, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.6939387562016735e-05, |
|
"loss": 2.0136, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.6924812367822764e-05, |
|
"loss": 1.9234, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.691020885416066e-05, |
|
"loss": 1.901, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.689557708075244e-05, |
|
"loss": 1.9517, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.688091710743568e-05, |
|
"loss": 1.9282, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.68662289941633e-05, |
|
"loss": 2.0253, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.6851512801003282e-05, |
|
"loss": 1.9939, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.6836768588138452e-05, |
|
"loss": 1.9548, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.6821996415866223e-05, |
|
"loss": 1.9861, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.6807196344598346e-05, |
|
"loss": 1.9772, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.6792368434860672e-05, |
|
"loss": 1.958, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.67775127472929e-05, |
|
"loss": 1.9588, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.676262934264832e-05, |
|
"loss": 1.9887, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.6747718281793582e-05, |
|
"loss": 1.9106, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.673277962570843e-05, |
|
"loss": 1.914, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.6717813435485473e-05, |
|
"loss": 1.9406, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.6702819772329904e-05, |
|
"loss": 1.9185, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.668779869755928e-05, |
|
"loss": 1.9934, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.6672750272603267e-05, |
|
"loss": 1.9559, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.665767455900336e-05, |
|
"loss": 2.0021, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.6642571618412673e-05, |
|
"loss": 1.9273, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.6627441512595654e-05, |
|
"loss": 1.9716, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.6612284303427852e-05, |
|
"loss": 1.9463, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.6597100052895653e-05, |
|
"loss": 1.8938, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.658188882309604e-05, |
|
"loss": 1.9758, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.6566650676236307e-05, |
|
"loss": 1.9275, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.655138567463385e-05, |
|
"loss": 1.9751, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.6536093880715876e-05, |
|
"loss": 1.9754, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.6520775357019174e-05, |
|
"loss": 1.968, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.6505430166189828e-05, |
|
"loss": 1.9585, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.6490058370982994e-05, |
|
"loss": 1.9287, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.6474660034262622e-05, |
|
"loss": 1.9728, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.6459235219001204e-05, |
|
"loss": 1.978, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.6443783988279523e-05, |
|
"loss": 1.9451, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.6428306405286383e-05, |
|
"loss": 1.9673, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.6412802533318363e-05, |
|
"loss": 1.9358, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.639727243577955e-05, |
|
"loss": 1.9624, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.6381716176181288e-05, |
|
"loss": 1.9, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.6366133818141893e-05, |
|
"loss": 1.9617, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.6350525425386438e-05, |
|
"loss": 1.9358, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.6334891061746453e-05, |
|
"loss": 1.9565, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.6319230791159676e-05, |
|
"loss": 1.9245, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.63035446776698e-05, |
|
"loss": 1.939, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.6287832785426196e-05, |
|
"loss": 1.9429, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.627209517868367e-05, |
|
"loss": 1.9298, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.625633192180218e-05, |
|
"loss": 1.9545, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.6240543079246586e-05, |
|
"loss": 1.9513, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.6224728715586374e-05, |
|
"loss": 1.9926, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.620888889549542e-05, |
|
"loss": 1.9601, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.6193023683751682e-05, |
|
"loss": 1.9468, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.617713314523697e-05, |
|
"loss": 1.9289, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.616121734493668e-05, |
|
"loss": 1.9203, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.6145276347939495e-05, |
|
"loss": 1.9468, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.612931021943716e-05, |
|
"loss": 1.9861, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.6113319024724186e-05, |
|
"loss": 1.9802, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.60973028291976e-05, |
|
"loss": 1.9265, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.6081261698356674e-05, |
|
"loss": 1.9757, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.6065195697802645e-05, |
|
"loss": 1.9665, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.604910489323846e-05, |
|
"loss": 1.9503, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.603298935046851e-05, |
|
"loss": 1.9481, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.601684913539835e-05, |
|
"loss": 1.9511, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.6000684314034426e-05, |
|
"loss": 1.9628, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.598449495248383e-05, |
|
"loss": 1.8868, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.5968281116954e-05, |
|
"loss": 1.9337, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.5952042873752463e-05, |
|
"loss": 1.9516, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.5935780289286566e-05, |
|
"loss": 1.967, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.59194934300632e-05, |
|
"loss": 1.9356, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.590318236268853e-05, |
|
"loss": 1.9748, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.5886847153867723e-05, |
|
"loss": 1.9188, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.587048787040467e-05, |
|
"loss": 1.9968, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.585410457920172e-05, |
|
"loss": 1.9988, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.5837697347259403e-05, |
|
"loss": 1.8772, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.582126624167615e-05, |
|
"loss": 1.9178, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.5804811329648037e-05, |
|
"loss": 1.9256, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.5788332678468488e-05, |
|
"loss": 1.8819, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.5780084471752673e-05, |
|
"loss": 1.9045, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.576357033823344e-05, |
|
"loss": 1.9067, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.5747032634220474e-05, |
|
"loss": 1.9169, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.5730471427345783e-05, |
|
"loss": 1.9107, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.5713886785337497e-05, |
|
"loss": 1.9408, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.5697278776019578e-05, |
|
"loss": 1.8719, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.568064746731156e-05, |
|
"loss": 1.8867, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5663992927228254e-05, |
|
"loss": 1.8921, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5647315223879474e-05, |
|
"loss": 1.8821, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5630614425469776e-05, |
|
"loss": 2.0148, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5613890600298147e-05, |
|
"loss": 1.9553, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.5597143816757758e-05, |
|
"loss": 1.8958, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.558037414333566e-05, |
|
"loss": 1.9025, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.5563581648612517e-05, |
|
"loss": 1.9068, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.5546766401262328e-05, |
|
"loss": 1.8626, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.5529928470052123e-05, |
|
"loss": 1.9218, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.5513067923841724e-05, |
|
"loss": 1.892, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.549618483158342e-05, |
|
"loss": 1.9211, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.5479279262321708e-05, |
|
"loss": 1.9199, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.5462351285193004e-05, |
|
"loss": 1.911, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.5445400969425372e-05, |
|
"loss": 1.9338, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.5428428384338224e-05, |
|
"loss": 1.9059, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.5411433599342038e-05, |
|
"loss": 1.9341, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.5394416683938095e-05, |
|
"loss": 1.9271, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.537737770771817e-05, |
|
"loss": 1.9101, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.5360316740364248e-05, |
|
"loss": 1.8771, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.5343233851648273e-05, |
|
"loss": 1.924, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5326129111431814e-05, |
|
"loss": 1.9237, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.530900258966582e-05, |
|
"loss": 1.9499, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5291854356390304e-05, |
|
"loss": 1.8753, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5274684481734076e-05, |
|
"loss": 1.8888, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.525749303591445e-05, |
|
"loss": 1.9085, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.5240280089236955e-05, |
|
"loss": 1.9355, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.5223045712095052e-05, |
|
"loss": 1.9223, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.5205789974969836e-05, |
|
"loss": 1.9465, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5188512948429765e-05, |
|
"loss": 1.9348, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5171214703130359e-05, |
|
"loss": 1.9093, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5153895309813903e-05, |
|
"loss": 1.8418, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5136554839309188e-05, |
|
"loss": 1.8449, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.5119193362531177e-05, |
|
"loss": 1.9036, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.510181095048076e-05, |
|
"loss": 1.8714, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.5084407674244435e-05, |
|
"loss": 1.9025, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.5066983604994021e-05, |
|
"loss": 1.8818, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.5049538813986385e-05, |
|
"loss": 1.887, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.5032073372563118e-05, |
|
"loss": 1.9412, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.501458735215028e-05, |
|
"loss": 1.9445, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.4997080824258084e-05, |
|
"loss": 1.9015, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.49795538604806e-05, |
|
"loss": 1.928, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.496200653249549e-05, |
|
"loss": 1.9313, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.494443891206368e-05, |
|
"loss": 1.9288, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.4926851071029087e-05, |
|
"loss": 1.8663, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4909243081318335e-05, |
|
"loss": 1.9197, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4891615014940429e-05, |
|
"loss": 1.8568, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.487396694398649e-05, |
|
"loss": 1.8997, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4856298940629446e-05, |
|
"loss": 1.9245, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.483861107712374e-05, |
|
"loss": 1.8979, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4820903425805032e-05, |
|
"loss": 1.904, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4803176059089905e-05, |
|
"loss": 1.9025, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4785429049475579e-05, |
|
"loss": 1.8939, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.4767662469539592e-05, |
|
"loss": 1.8951, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.4749876391939526e-05, |
|
"loss": 1.8756, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.4732070889412693e-05, |
|
"loss": 1.8907, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.471424603477585e-05, |
|
"loss": 1.9582, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.469640190092489e-05, |
|
"loss": 1.9328, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.4678538560834552e-05, |
|
"loss": 1.8998, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.4660656087558128e-05, |
|
"loss": 1.9282, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.4642754554227141e-05, |
|
"loss": 1.8567, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4624834034051072e-05, |
|
"loss": 1.8914, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4606894600317047e-05, |
|
"loss": 1.9352, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4588936326389544e-05, |
|
"loss": 1.8902, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4570959285710088e-05, |
|
"loss": 1.8921, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.4552963551796942e-05, |
|
"loss": 1.9311, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.4534949198244828e-05, |
|
"loss": 1.9264, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.4516916298724607e-05, |
|
"loss": 1.9239, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.4498864926982996e-05, |
|
"loss": 1.8674, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.4480795156842238e-05, |
|
"loss": 1.8841, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.4462707062199834e-05, |
|
"loss": 1.8944, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.4444600717028214e-05, |
|
"loss": 1.8848, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.4426476195374449e-05, |
|
"loss": 1.9242, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4408333571359943e-05, |
|
"loss": 1.8779, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4390172919180127e-05, |
|
"loss": 1.8901, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4371994313104165e-05, |
|
"loss": 1.8814, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4353797827474643e-05, |
|
"loss": 1.9198, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.4335583536707267e-05, |
|
"loss": 1.8793, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.4317351515290558e-05, |
|
"loss": 1.926, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.4299101837785542e-05, |
|
"loss": 1.8822, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.428083457882546e-05, |
|
"loss": 1.8991, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.426254981311545e-05, |
|
"loss": 1.9236, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.424424761543224e-05, |
|
"loss": 1.8481, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.4225928060623858e-05, |
|
"loss": 1.8402, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.4207591223609298e-05, |
|
"loss": 1.8852, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.4189237179378252e-05, |
|
"loss": 1.8733, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.4170866002990764e-05, |
|
"loss": 1.9203, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.415247776957695e-05, |
|
"loss": 1.8417, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.4134072554336685e-05, |
|
"loss": 1.8471, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.4115650432539281e-05, |
|
"loss": 1.9266, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.4097211479523198e-05, |
|
"loss": 1.8342, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.407875577069573e-05, |
|
"loss": 1.8791, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.4060283381532686e-05, |
|
"loss": 1.8806, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4041794387578103e-05, |
|
"loss": 1.9552, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4023288864443915e-05, |
|
"loss": 1.8943, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4004766887809658e-05, |
|
"loss": 1.923, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.3986228533422151e-05, |
|
"loss": 1.831, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.3967673877095196e-05, |
|
"loss": 1.891, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.3949102994709256e-05, |
|
"loss": 1.8571, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.393051596221116e-05, |
|
"loss": 1.8803, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.3911912855613776e-05, |
|
"loss": 1.9329, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.3893293750995715e-05, |
|
"loss": 1.8711, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.3874658724501013e-05, |
|
"loss": 1.8458, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.3856007852338809e-05, |
|
"loss": 1.8371, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.3837341210783052e-05, |
|
"loss": 1.8411, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.3818658876172189e-05, |
|
"loss": 1.8549, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.3799960924908823e-05, |
|
"loss": 1.8738, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.3781247433459447e-05, |
|
"loss": 1.8871, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.3762518478354086e-05, |
|
"loss": 1.8488, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.374377413618602e-05, |
|
"loss": 1.8801, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.3725014483611443e-05, |
|
"loss": 1.8704, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.3706239597349172e-05, |
|
"loss": 1.8813, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.368744955418032e-05, |
|
"loss": 1.8745, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.3668644430947977e-05, |
|
"loss": 1.8702, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.3649824304556918e-05, |
|
"loss": 1.9195, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.3630989251973263e-05, |
|
"loss": 1.8924, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.3612139350224181e-05, |
|
"loss": 1.8678, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.3593274676397563e-05, |
|
"loss": 1.871, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.3574395307641712e-05, |
|
"loss": 1.9188, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.3555501321165033e-05, |
|
"loss": 1.9325, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.3536592794235696e-05, |
|
"loss": 1.8945, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.3517669804181357e-05, |
|
"loss": 1.9346, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.34987324283888e-05, |
|
"loss": 1.8379, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.3479780744303647e-05, |
|
"loss": 1.8548, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.3460814829430042e-05, |
|
"loss": 1.876, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3441834761330315e-05, |
|
"loss": 1.8611, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3422840617624691e-05, |
|
"loss": 1.901, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3403832475990938e-05, |
|
"loss": 1.8878, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3384810414164088e-05, |
|
"loss": 1.8961, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.3365774509936097e-05, |
|
"loss": 1.848, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.3346724841155516e-05, |
|
"loss": 1.9287, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.3327661485727204e-05, |
|
"loss": 1.8832, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.3308584521611984e-05, |
|
"loss": 1.8757, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.3289494026826337e-05, |
|
"loss": 1.9066, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.3270390079442082e-05, |
|
"loss": 1.8535, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.3251272757586034e-05, |
|
"loss": 1.9422, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.3232142139439729e-05, |
|
"loss": 1.905, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3212998303239059e-05, |
|
"loss": 1.9303, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.319384132727399e-05, |
|
"loss": 1.8454, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3174671289888205e-05, |
|
"loss": 1.8755, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3155488269478816e-05, |
|
"loss": 1.9152, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3136292344496026e-05, |
|
"loss": 1.8474, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3117083593442815e-05, |
|
"loss": 1.8779, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3097862094874607e-05, |
|
"loss": 1.8939, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3078627927398968e-05, |
|
"loss": 1.8684, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3059381169675267e-05, |
|
"loss": 1.8827, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3040121900414371e-05, |
|
"loss": 1.8655, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3020850198378299e-05, |
|
"loss": 1.8309, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.300156614237993e-05, |
|
"loss": 1.9122, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.298226981128265e-05, |
|
"loss": 1.9131, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.2962961284000067e-05, |
|
"loss": 1.9138, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.2943640639495639e-05, |
|
"loss": 1.8432, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.2924307956782398e-05, |
|
"loss": 1.9196, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.29049633149226e-05, |
|
"loss": 1.9307, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.2885606793027408e-05, |
|
"loss": 1.8495, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.2866238470256571e-05, |
|
"loss": 1.8663, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.2846858425818097e-05, |
|
"loss": 1.8547, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2827466738967932e-05, |
|
"loss": 1.9008, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.280806348900964e-05, |
|
"loss": 1.8566, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2788648755294056e-05, |
|
"loss": 1.8615, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2769222617218995e-05, |
|
"loss": 1.8961, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.2749785154228904e-05, |
|
"loss": 1.888, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.2730336445814549e-05, |
|
"loss": 1.894, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.2710876571512674e-05, |
|
"loss": 1.8882, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.2691405610905698e-05, |
|
"loss": 1.8688, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.2671923643621376e-05, |
|
"loss": 1.8765, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.2652430749332472e-05, |
|
"loss": 1.868, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.2632927007756438e-05, |
|
"loss": 1.8706, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.2613412498655082e-05, |
|
"loss": 1.8333, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.2593887301834257e-05, |
|
"loss": 1.9108, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.2574351497143522e-05, |
|
"loss": 1.8606, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.2554805164475805e-05, |
|
"loss": 1.8601, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.2535248383767102e-05, |
|
"loss": 1.8281, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.2515681234996133e-05, |
|
"loss": 1.8664, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.2496103798184019e-05, |
|
"loss": 1.93, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.2476516153393954e-05, |
|
"loss": 1.8555, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.2456918380730878e-05, |
|
"loss": 1.8202, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.243731056034115e-05, |
|
"loss": 1.8831, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2417692772412222e-05, |
|
"loss": 1.8999, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2398065097172302e-05, |
|
"loss": 1.8463, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2378427614890041e-05, |
|
"loss": 1.9005, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.2358780405874193e-05, |
|
"loss": 1.8984, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.233912355047329e-05, |
|
"loss": 1.8693, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.2319457129075314e-05, |
|
"loss": 1.8947, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.229978122210737e-05, |
|
"loss": 1.8737, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2280095910035343e-05, |
|
"loss": 1.7621, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.22604012733636e-05, |
|
"loss": 1.8208, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2240697392634631e-05, |
|
"loss": 1.8368, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2220984348428719e-05, |
|
"loss": 1.8651, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2201262221363652e-05, |
|
"loss": 1.8629, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.218153109209433e-05, |
|
"loss": 1.8932, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.216179104131249e-05, |
|
"loss": 1.8631, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2142042149746343e-05, |
|
"loss": 1.8515, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2122284498160256e-05, |
|
"loss": 1.7967, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2102518167354433e-05, |
|
"loss": 1.8777, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2082743238164553e-05, |
|
"loss": 1.8218, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2062959791461473e-05, |
|
"loss": 1.8485, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.2043167908150874e-05, |
|
"loss": 1.8748, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.2023367669172947e-05, |
|
"loss": 1.859, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.2003559155502052e-05, |
|
"loss": 1.9179, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.1983742448146377e-05, |
|
"loss": 1.9111, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1963917628147644e-05, |
|
"loss": 1.8902, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1944084776580722e-05, |
|
"loss": 1.8747, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1924243974553349e-05, |
|
"loss": 1.9109, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.1904395303205764e-05, |
|
"loss": 1.806, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1884538843710396e-05, |
|
"loss": 1.8412, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1864674677271521e-05, |
|
"loss": 1.79, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1844802885124928e-05, |
|
"loss": 1.8406, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1824923548537602e-05, |
|
"loss": 1.8018, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1805036748807371e-05, |
|
"loss": 1.799, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1785142567262591e-05, |
|
"loss": 1.8686, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1765241085261802e-05, |
|
"loss": 1.8628, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1745332384193408e-05, |
|
"loss": 1.8055, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.1725416545475328e-05, |
|
"loss": 1.8962, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.1705493650554667e-05, |
|
"loss": 1.8155, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.16855637809074e-05, |
|
"loss": 1.8189, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.1665627018038013e-05, |
|
"loss": 1.8312, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.164568344347919e-05, |
|
"loss": 1.8848, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.1625733138791468e-05, |
|
"loss": 1.8952, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.1605776185562909e-05, |
|
"loss": 1.8726, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.1585812665408764e-05, |
|
"loss": 1.8264, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.156584265997114e-05, |
|
"loss": 1.8375, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.1545866250918667e-05, |
|
"loss": 1.8172, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.1525883519946163e-05, |
|
"loss": 1.8732, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.1505894548774294e-05, |
|
"loss": 1.8296, |
|
"step": 1502 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.148589941914926e-05, |
|
"loss": 1.9029, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1465898212842426e-05, |
|
"loss": 1.8767, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1445891011650025e-05, |
|
"loss": 1.8337, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1425877897392799e-05, |
|
"loss": 1.8418, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1405858951915676e-05, |
|
"loss": 1.8653, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1385834257087427e-05, |
|
"loss": 1.822, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1365803894800334e-05, |
|
"loss": 1.8383, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1345767946969866e-05, |
|
"loss": 1.8838, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.132572649553432e-05, |
|
"loss": 1.817, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.1305679622454511e-05, |
|
"loss": 1.8163, |
|
"step": 1522 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.1285627409713424e-05, |
|
"loss": 1.8337, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.1265569939315882e-05, |
|
"loss": 1.8499, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1245507293288204e-05, |
|
"loss": 1.8354, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1225439553677881e-05, |
|
"loss": 1.7944, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1205366802553231e-05, |
|
"loss": 1.8707, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1185289122003071e-05, |
|
"loss": 1.839, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.1165206594136371e-05, |
|
"loss": 1.8798, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.114511930108193e-05, |
|
"loss": 1.8741, |
|
"step": 1538 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.1125027324988029e-05, |
|
"loss": 1.8993, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.1104930748022109e-05, |
|
"loss": 1.8224, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.1084829652370417e-05, |
|
"loss": 1.8658, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.1064724120237687e-05, |
|
"loss": 1.8583, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.104461423384679e-05, |
|
"loss": 1.8516, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.1024500075438414e-05, |
|
"loss": 1.893, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.1004381727270704e-05, |
|
"loss": 1.8721, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.0984259271618947e-05, |
|
"loss": 1.8654, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.0964132790775231e-05, |
|
"loss": 1.9032, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.0944002367048097e-05, |
|
"loss": 1.8805, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0923868082762217e-05, |
|
"loss": 1.8381, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0903730020258052e-05, |
|
"loss": 1.8339, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0883588261891507e-05, |
|
"loss": 1.8639, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0863442890033608e-05, |
|
"loss": 1.7967, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0843293987070154e-05, |
|
"loss": 1.8535, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0823141635401388e-05, |
|
"loss": 1.8319, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0802985917441657e-05, |
|
"loss": 1.8606, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0782826915619074e-05, |
|
"loss": 1.8677, |
|
"step": 1574 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.0762664712375179e-05, |
|
"loss": 1.8294, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.0742499390164609e-05, |
|
"loss": 1.9059, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.0722331031454749e-05, |
|
"loss": 1.8348, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.0702159718725413e-05, |
|
"loss": 1.8599, |
|
"step": 1582 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.0681985534468484e-05, |
|
"loss": 1.814, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.0661808561187597e-05, |
|
"loss": 1.9452, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.0641628881397785e-05, |
|
"loss": 1.7966, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.0621446577625154e-05, |
|
"loss": 1.9034, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.060126173240655e-05, |
|
"loss": 1.831, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.0581074428289193e-05, |
|
"loss": 1.8329, |
|
"step": 1594 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.0560884747830375e-05, |
|
"loss": 1.8224, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.0540692773597097e-05, |
|
"loss": 1.8928, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.0520498588165746e-05, |
|
"loss": 1.8735, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.0500302274121748e-05, |
|
"loss": 1.8609, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.0480103914059235e-05, |
|
"loss": 1.812, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.0459903590580706e-05, |
|
"loss": 1.8607, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.0439701386296696e-05, |
|
"loss": 1.8484, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.0419497383825425e-05, |
|
"loss": 1.8574, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.0399291665792463e-05, |
|
"loss": 1.8783, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.0379084314830405e-05, |
|
"loss": 1.8025, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.0358875413578524e-05, |
|
"loss": 1.8968, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.0338665044682418e-05, |
|
"loss": 1.8557, |
|
"step": 1618 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.0318453290793706e-05, |
|
"loss": 1.8275, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.0298240234569661e-05, |
|
"loss": 1.7935, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.0278025958672886e-05, |
|
"loss": 1.7812, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.0257810545770966e-05, |
|
"loss": 1.7503, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.0237594078536141e-05, |
|
"loss": 1.7857, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.0217376639644964e-05, |
|
"loss": 1.8114, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.0197158311777957e-05, |
|
"loss": 1.7967, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.0176939177619283e-05, |
|
"loss": 1.7743, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.015671931985639e-05, |
|
"loss": 1.8089, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.0136498821179704e-05, |
|
"loss": 1.821, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.011627776428226e-05, |
|
"loss": 1.8334, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.0096056231859375e-05, |
|
"loss": 1.8699, |
|
"step": 1642 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.007583430660832e-05, |
|
"loss": 1.7575, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.0055612071227958e-05, |
|
"loss": 1.8264, |
|
"step": 1646 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.0035389608418435e-05, |
|
"loss": 1.8277, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.001516700088082e-05, |
|
"loss": 1.7808, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.994944331316771e-06, |
|
"loss": 1.8691, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.974721682428208e-06, |
|
"loss": 1.7907, |
|
"step": 1654 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.95449913691696e-06, |
|
"loss": 1.8094, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.934276777484436e-06, |
|
"loss": 1.8144, |
|
"step": 1658 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.914054686831281e-06, |
|
"loss": 1.8336, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.893832947657042e-06, |
|
"loss": 1.7941, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.873611642659833e-06, |
|
"loss": 1.802, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.853390854535988e-06, |
|
"loss": 1.8163, |
|
"step": 1666 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.833170665979725e-06, |
|
"loss": 1.8184, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.812951159682817e-06, |
|
"loss": 1.8055, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.792732418334243e-06, |
|
"loss": 1.8084, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.772514524619846e-06, |
|
"loss": 1.8102, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.752297561222023e-06, |
|
"loss": 1.8164, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.732081610819346e-06, |
|
"loss": 1.8212, |
|
"step": 1678 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.711866756086252e-06, |
|
"loss": 1.7861, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.691653079692694e-06, |
|
"loss": 1.8109, |
|
"step": 1682 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.671440664303813e-06, |
|
"loss": 1.8495, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.651229592579596e-06, |
|
"loss": 1.8491, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.631019947174514e-06, |
|
"loss": 1.852, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.610811810737231e-06, |
|
"loss": 1.8011, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.590605265910225e-06, |
|
"loss": 1.7655, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.570400395329466e-06, |
|
"loss": 1.8085, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.550197281624078e-06, |
|
"loss": 1.8162, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.529996007416007e-06, |
|
"loss": 1.7833, |
|
"step": 1698 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.509796655319665e-06, |
|
"loss": 1.7894, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.489599307941608e-06, |
|
"loss": 1.8012, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.469404047880205e-06, |
|
"loss": 1.7753, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.44921095772527e-06, |
|
"loss": 1.7456, |
|
"step": 1706 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.429020120057747e-06, |
|
"loss": 1.815, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.408831617449385e-06, |
|
"loss": 1.8138, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.388645532462366e-06, |
|
"loss": 1.8053, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.368461947648986e-06, |
|
"loss": 1.7459, |
|
"step": 1714 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.348280945551324e-06, |
|
"loss": 1.813, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.328102608700895e-06, |
|
"loss": 1.788, |
|
"step": 1718 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.307927019618313e-06, |
|
"loss": 1.7766, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.287754260812949e-06, |
|
"loss": 1.7857, |
|
"step": 1722 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.267584414782614e-06, |
|
"loss": 1.7624, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.247417564013192e-06, |
|
"loss": 1.7824, |
|
"step": 1726 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.227253790978326e-06, |
|
"loss": 1.8102, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.207093178139067e-06, |
|
"loss": 1.7498, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.186935807943546e-06, |
|
"loss": 1.7743, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.16678176282664e-06, |
|
"loss": 1.7482, |
|
"step": 1734 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.146631125209608e-06, |
|
"loss": 1.7707, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.126483977499797e-06, |
|
"loss": 1.8034, |
|
"step": 1738 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.106340402090266e-06, |
|
"loss": 1.8073, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.08620048135947e-06, |
|
"loss": 1.8022, |
|
"step": 1742 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.066064297670914e-06, |
|
"loss": 1.7745, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.04593193337283e-06, |
|
"loss": 1.8555, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.025803470797823e-06, |
|
"loss": 1.8339, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.005678992262535e-06, |
|
"loss": 1.7979, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.985558580067337e-06, |
|
"loss": 1.7941, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.965442316495945e-06, |
|
"loss": 1.8046, |
|
"step": 1754 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.94533028381512e-06, |
|
"loss": 1.7796, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.92522256427433e-06, |
|
"loss": 1.7516, |
|
"step": 1758 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.905119240105386e-06, |
|
"loss": 1.833, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.885020393522136e-06, |
|
"loss": 1.8348, |
|
"step": 1762 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.86492610672011e-06, |
|
"loss": 1.8329, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.8448364618762e-06, |
|
"loss": 1.8099, |
|
"step": 1766 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.824751541148305e-06, |
|
"loss": 1.8097, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.804671426675003e-06, |
|
"loss": 1.7472, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.78459620057523e-06, |
|
"loss": 1.8142, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.764525944947915e-06, |
|
"loss": 1.8346, |
|
"step": 1774 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.74446074187167e-06, |
|
"loss": 1.7788, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.724400673404438e-06, |
|
"loss": 1.8045, |
|
"step": 1778 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.704345821583169e-06, |
|
"loss": 1.775, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.684296268423477e-06, |
|
"loss": 1.7687, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.664252095919303e-06, |
|
"loss": 1.7898, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.644213386042594e-06, |
|
"loss": 1.8123, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.624180220742945e-06, |
|
"loss": 1.8162, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.60415268194728e-06, |
|
"loss": 1.8055, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.58413085155952e-06, |
|
"loss": 1.8186, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.564114811460233e-06, |
|
"loss": 1.8004, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.544104643506308e-06, |
|
"loss": 1.793, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.524100429530621e-06, |
|
"loss": 1.823, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.504102251341704e-06, |
|
"loss": 1.8422, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.484110190723396e-06, |
|
"loss": 1.8116, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.464124329434522e-06, |
|
"loss": 1.7723, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.444144749208558e-06, |
|
"loss": 1.8069, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.424171531753288e-06, |
|
"loss": 1.8267, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.404204758750475e-06, |
|
"loss": 1.7934, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.38424451185553e-06, |
|
"loss": 1.721, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.364290872697175e-06, |
|
"loss": 1.7947, |
|
"step": 1814 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.344343922877107e-06, |
|
"loss": 1.8914, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.324403743969666e-06, |
|
"loss": 1.8247, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.30447041752151e-06, |
|
"loss": 1.7762, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.28454402505126e-06, |
|
"loss": 1.7987, |
|
"step": 1822 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.264624648049188e-06, |
|
"loss": 1.7829, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.244712367976878e-06, |
|
"loss": 1.8121, |
|
"step": 1826 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.224807266266883e-06, |
|
"loss": 1.7393, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.20490942432241e-06, |
|
"loss": 1.7823, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.185018923516963e-06, |
|
"loss": 1.7875, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.16513584519404e-06, |
|
"loss": 1.7663, |
|
"step": 1834 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.145260270666775e-06, |
|
"loss": 1.7447, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.125392281217605e-06, |
|
"loss": 1.807, |
|
"step": 1838 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.105531958097973e-06, |
|
"loss": 1.8077, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.085679382527945e-06, |
|
"loss": 1.8029, |
|
"step": 1842 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.06583463569592e-06, |
|
"loss": 1.7999, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.045997798758263e-06, |
|
"loss": 1.8458, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.026168952839014e-06, |
|
"loss": 1.7925, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.006348179029517e-06, |
|
"loss": 1.7963, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.986535558388103e-06, |
|
"loss": 1.8324, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.966731171939776e-06, |
|
"loss": 1.8655, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.946935100675848e-06, |
|
"loss": 1.7805, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.927147425553635e-06, |
|
"loss": 1.7398, |
|
"step": 1858 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.907368227496111e-06, |
|
"loss": 1.853, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.887597587391591e-06, |
|
"loss": 1.8018, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.86783558609339e-06, |
|
"loss": 1.836, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.848082304419478e-06, |
|
"loss": 1.7692, |
|
"step": 1866 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.82833782315219e-06, |
|
"loss": 1.8082, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.808602223037855e-06, |
|
"loss": 1.8154, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.788875584786484e-06, |
|
"loss": 1.816, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 7.769157989071447e-06, |
|
"loss": 1.8358, |
|
"step": 1874 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 7.74944951652912e-06, |
|
"loss": 1.843, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 7.729750247758582e-06, |
|
"loss": 1.7787, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 7.710060263321259e-06, |
|
"loss": 1.8051, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 7.690379643740628e-06, |
|
"loss": 1.8016, |
|
"step": 1882 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 7.670708469501848e-06, |
|
"loss": 1.8164, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 7.651046821051454e-06, |
|
"loss": 1.808, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 7.631394778797042e-06, |
|
"loss": 1.7887, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.6117524231068985e-06, |
|
"loss": 1.7793, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.5921198343097145e-06, |
|
"loss": 1.8733, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.5724970926942265e-06, |
|
"loss": 1.7316, |
|
"step": 1894 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.552884278508913e-06, |
|
"loss": 1.7447, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 7.533281471961642e-06, |
|
"loss": 1.7504, |
|
"step": 1898 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 7.523483846606048e-06, |
|
"loss": 1.8538, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 7.5038962018159845e-06, |
|
"loss": 1.8364, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 7.484318765003867e-06, |
|
"loss": 1.8108, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 7.464751616232902e-06, |
|
"loss": 1.7535, |
|
"step": 1906 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 7.445194835524198e-06, |
|
"loss": 1.7906, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 7.425648502856483e-06, |
|
"loss": 1.7392, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 7.406112698165742e-06, |
|
"loss": 1.7712, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.3865875013449195e-06, |
|
"loss": 1.8206, |
|
"step": 1914 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.367072992243569e-06, |
|
"loss": 1.793, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.34756925066753e-06, |
|
"loss": 1.7805, |
|
"step": 1918 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.328076356378626e-06, |
|
"loss": 1.7996, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.308594389094306e-06, |
|
"loss": 1.784, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.28912342848733e-06, |
|
"loss": 1.8146, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.269663554185455e-06, |
|
"loss": 1.7956, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.2502148457711e-06, |
|
"loss": 1.8095, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.230777382781012e-06, |
|
"loss": 1.7846, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.211351244705947e-06, |
|
"loss": 1.7649, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.191936510990365e-06, |
|
"loss": 1.7949, |
|
"step": 1934 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.172533261032069e-06, |
|
"loss": 1.817, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.153141574181903e-06, |
|
"loss": 1.7536, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.133761529743432e-06, |
|
"loss": 1.8015, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.1143932069725956e-06, |
|
"loss": 1.7432, |
|
"step": 1942 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.095036685077404e-06, |
|
"loss": 1.8241, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.0756920432176035e-06, |
|
"loss": 1.769, |
|
"step": 1946 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.056359360504363e-06, |
|
"loss": 1.7899, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.037038715999939e-06, |
|
"loss": 1.7825, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.017730188717348e-06, |
|
"loss": 1.7613, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 6.998433857620075e-06, |
|
"loss": 1.8186, |
|
"step": 1954 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 6.979149801621703e-06, |
|
"loss": 1.8042, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 6.959878099585634e-06, |
|
"loss": 1.8133, |
|
"step": 1958 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 6.940618830324732e-06, |
|
"loss": 1.7957, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 6.921372072601035e-06, |
|
"loss": 1.8154, |
|
"step": 1962 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 6.902137905125397e-06, |
|
"loss": 1.8523, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 6.882916406557188e-06, |
|
"loss": 1.8621, |
|
"step": 1966 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 6.863707655503975e-06, |
|
"loss": 1.8588, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 6.844511730521186e-06, |
|
"loss": 1.7631, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 6.825328710111801e-06, |
|
"loss": 1.8152, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 6.806158672726013e-06, |
|
"loss": 1.8447, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 6.787001696760942e-06, |
|
"loss": 1.7771, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 6.767857860560276e-06, |
|
"loss": 1.8188, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 6.748727242413966e-06, |
|
"loss": 1.8101, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 6.729609920557922e-06, |
|
"loss": 1.782, |
|
"step": 1982 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 6.7105059731736645e-06, |
|
"loss": 1.748, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 6.691415478388016e-06, |
|
"loss": 1.7672, |
|
"step": 1986 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 6.672338514272801e-06, |
|
"loss": 1.8024, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 6.653275158844488e-06, |
|
"loss": 1.7669, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 6.634225490063909e-06, |
|
"loss": 1.8115, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.615189585835912e-06, |
|
"loss": 1.785, |
|
"step": 1994 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.596167524009064e-06, |
|
"loss": 1.7757, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.577159382375316e-06, |
|
"loss": 1.8101, |
|
"step": 1998 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.558165238669685e-06, |
|
"loss": 1.754, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.539185170569962e-06, |
|
"loss": 1.7901, |
|
"step": 2002 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.520219255696356e-06, |
|
"loss": 1.7702, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.501267571611207e-06, |
|
"loss": 1.7951, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.482330195818646e-06, |
|
"loss": 1.8218, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.4634072057643045e-06, |
|
"loss": 1.7576, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.444498678834974e-06, |
|
"loss": 1.7987, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.4256046923582895e-06, |
|
"loss": 1.7833, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.406725323602441e-06, |
|
"loss": 1.7606, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 6.387860649775822e-06, |
|
"loss": 1.7676, |
|
"step": 2018 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 6.369010748026739e-06, |
|
"loss": 1.7803, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 6.350175695443085e-06, |
|
"loss": 1.7635, |
|
"step": 2022 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 6.331355569052027e-06, |
|
"loss": 1.807, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.312550445819687e-06, |
|
"loss": 1.7618, |
|
"step": 2026 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.2937604026508295e-06, |
|
"loss": 1.8215, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.27498551638856e-06, |
|
"loss": 1.7923, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.256225863813985e-06, |
|
"loss": 1.7972, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 6.237481521645915e-06, |
|
"loss": 1.801, |
|
"step": 2034 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 6.218752566540555e-06, |
|
"loss": 1.7799, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 6.2000390750911775e-06, |
|
"loss": 1.8538, |
|
"step": 2038 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 6.181341123827816e-06, |
|
"loss": 1.7324, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 6.162658789216946e-06, |
|
"loss": 1.7931, |
|
"step": 2042 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 6.143992147661195e-06, |
|
"loss": 1.7487, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 6.1253412754989926e-06, |
|
"loss": 1.7697, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 6.106706249004284e-06, |
|
"loss": 1.7764, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 6.088087144386225e-06, |
|
"loss": 1.7744, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 6.069484037788844e-06, |
|
"loss": 1.7781, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 6.050897005290749e-06, |
|
"loss": 1.8226, |
|
"step": 2054 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 6.032326122904808e-06, |
|
"loss": 1.8304, |
|
"step": 2056 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 6.013771466577851e-06, |
|
"loss": 1.7915, |
|
"step": 2058 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 5.9952331121903466e-06, |
|
"loss": 1.813, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 5.976711135556086e-06, |
|
"loss": 1.8069, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 5.9582056124219e-06, |
|
"loss": 1.8052, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 5.939716618467317e-06, |
|
"loss": 1.8314, |
|
"step": 2066 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 5.921244229304275e-06, |
|
"loss": 1.7603, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 5.9027885204768045e-06, |
|
"loss": 1.7968, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 5.884349567460723e-06, |
|
"loss": 1.7951, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 5.8659274456633195e-06, |
|
"loss": 1.7493, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 5.8475222304230505e-06, |
|
"loss": 1.8222, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 5.829133997009238e-06, |
|
"loss": 1.8119, |
|
"step": 2078 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 5.8107628206217516e-06, |
|
"loss": 1.7982, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 5.792408776390701e-06, |
|
"loss": 1.7449, |
|
"step": 2082 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 5.774071939376146e-06, |
|
"loss": 1.8028, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 5.755752384567762e-06, |
|
"loss": 1.7828, |
|
"step": 2086 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 5.737450186884555e-06, |
|
"loss": 1.7213, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 5.7191654211745405e-06, |
|
"loss": 1.7802, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 5.700898162214461e-06, |
|
"loss": 1.8378, |
|
"step": 2092 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 5.682648484709447e-06, |
|
"loss": 1.8132, |
|
"step": 2094 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 5.664416463292734e-06, |
|
"loss": 1.7297, |
|
"step": 2096 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 5.646202172525359e-06, |
|
"loss": 1.7344, |
|
"step": 2098 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 5.62800568689584e-06, |
|
"loss": 1.8234, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 5.609827080819876e-06, |
|
"loss": 1.8401, |
|
"step": 2102 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 5.591666428640062e-06, |
|
"loss": 1.8273, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 5.573523804625551e-06, |
|
"loss": 1.7594, |
|
"step": 2106 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 5.555399282971787e-06, |
|
"loss": 1.7938, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 5.537292937800165e-06, |
|
"loss": 1.7991, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 5.519204843157762e-06, |
|
"loss": 1.8119, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.501135073017008e-06, |
|
"loss": 1.8301, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.483083701275391e-06, |
|
"loss": 1.7656, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.465050801755174e-06, |
|
"loss": 1.8065, |
|
"step": 2118 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.447036448203062e-06, |
|
"loss": 1.8037, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.4290407142899175e-06, |
|
"loss": 1.7335, |
|
"step": 2122 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.4110636736104545e-06, |
|
"loss": 1.7776, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.393105399682954e-06, |
|
"loss": 1.7953, |
|
"step": 2126 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.3751659659489334e-06, |
|
"loss": 1.7409, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.357245445772863e-06, |
|
"loss": 1.7813, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.339343912441877e-06, |
|
"loss": 1.7938, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.321461439165452e-06, |
|
"loss": 1.8118, |
|
"step": 2134 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.3035980990751135e-06, |
|
"loss": 1.7505, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.285753965224154e-06, |
|
"loss": 1.7165, |
|
"step": 2138 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.267929110587308e-06, |
|
"loss": 1.7894, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.250123608060476e-06, |
|
"loss": 1.8054, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.2323375304604076e-06, |
|
"loss": 1.7548, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.2145709505244225e-06, |
|
"loss": 1.7672, |
|
"step": 2146 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.196823940910096e-06, |
|
"loss": 1.7785, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.17909657419497e-06, |
|
"loss": 1.8292, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.161388922876263e-06, |
|
"loss": 1.7949, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.143701059370556e-06, |
|
"loss": 1.7908, |
|
"step": 2154 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.126033056013513e-06, |
|
"loss": 1.7925, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.108384985059572e-06, |
|
"loss": 1.742, |
|
"step": 2158 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.090756918681669e-06, |
|
"loss": 1.7872, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.073148928970917e-06, |
|
"loss": 1.7558, |
|
"step": 2162 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.055561087936325e-06, |
|
"loss": 1.7815, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.037993467504515e-06, |
|
"loss": 1.8125, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.020446139519404e-06, |
|
"loss": 1.8043, |
|
"step": 2168 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.0029191757419185e-06, |
|
"loss": 1.7691, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.985412647849721e-06, |
|
"loss": 1.7858, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.967926627436882e-06, |
|
"loss": 1.7839, |
|
"step": 2174 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.9504611860136185e-06, |
|
"loss": 1.8227, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.933016395005979e-06, |
|
"loss": 1.7703, |
|
"step": 2178 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.915592325755569e-06, |
|
"loss": 1.8115, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.898189049519243e-06, |
|
"loss": 1.782, |
|
"step": 2182 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.880806637468828e-06, |
|
"loss": 1.7804, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.863445160690815e-06, |
|
"loss": 1.8148, |
|
"step": 2186 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.846104690186097e-06, |
|
"loss": 1.7726, |
|
"step": 2188 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.828785296869646e-06, |
|
"loss": 1.7901, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.811487051570235e-06, |
|
"loss": 1.765, |
|
"step": 2192 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.794210025030167e-06, |
|
"loss": 1.8182, |
|
"step": 2194 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.776954287904955e-06, |
|
"loss": 1.8019, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.759719910763049e-06, |
|
"loss": 1.7577, |
|
"step": 2198 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.742506964085555e-06, |
|
"loss": 1.7665, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.725315518265926e-06, |
|
"loss": 1.7769, |
|
"step": 2202 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.7081456436097e-06, |
|
"loss": 1.7785, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.69099741033418e-06, |
|
"loss": 1.7803, |
|
"step": 2206 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.673870888568185e-06, |
|
"loss": 1.781, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.656766148351729e-06, |
|
"loss": 1.7455, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.63968325963575e-06, |
|
"loss": 1.7833, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.6226222922818345e-06, |
|
"loss": 1.7523, |
|
"step": 2214 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.6055833160619076e-06, |
|
"loss": 1.7976, |
|
"step": 2216 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.588566400657965e-06, |
|
"loss": 1.749, |
|
"step": 2218 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.57157161566178e-06, |
|
"loss": 1.77, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.55459903057463e-06, |
|
"loss": 1.8299, |
|
"step": 2222 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.5376487148069995e-06, |
|
"loss": 1.7764, |
|
"step": 2224 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.5207207376782954e-06, |
|
"loss": 1.7518, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.503815168416584e-06, |
|
"loss": 1.7597, |
|
"step": 2228 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.486932076158279e-06, |
|
"loss": 1.8031, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.470071529947877e-06, |
|
"loss": 1.7573, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.453233598737678e-06, |
|
"loss": 1.7651, |
|
"step": 2234 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.436418351387483e-06, |
|
"loss": 1.7526, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.419625856664342e-06, |
|
"loss": 1.7442, |
|
"step": 2238 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.402856183242241e-06, |
|
"loss": 1.7393, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.386109399701853e-06, |
|
"loss": 1.7378, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.369385574530227e-06, |
|
"loss": 1.8041, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.352684776120525e-06, |
|
"loss": 1.7883, |
|
"step": 2246 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.336007072771749e-06, |
|
"loss": 1.7932, |
|
"step": 2248 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.319352532688444e-06, |
|
"loss": 1.771, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.302721223980426e-06, |
|
"loss": 1.7801, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.286113214662507e-06, |
|
"loss": 1.7642, |
|
"step": 2254 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.269528572654221e-06, |
|
"loss": 1.7942, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.252967365779532e-06, |
|
"loss": 1.806, |
|
"step": 2258 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.236429661766562e-06, |
|
"loss": 1.7692, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.219915528247331e-06, |
|
"loss": 1.7478, |
|
"step": 2262 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.203425032757449e-06, |
|
"loss": 1.7524, |
|
"step": 2264 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.186958242735861e-06, |
|
"loss": 1.8303, |
|
"step": 2266 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.1705152255245774e-06, |
|
"loss": 1.7769, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.15409604836838e-06, |
|
"loss": 1.7617, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.137700778414555e-06, |
|
"loss": 1.8643, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.121329482712615e-06, |
|
"loss": 1.7643, |
|
"step": 2274 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.104982228214039e-06, |
|
"loss": 1.7848, |
|
"step": 2276 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.0886590817719795e-06, |
|
"loss": 1.8095, |
|
"step": 2278 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.072360110140996e-06, |
|
"loss": 1.8052, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.056085379976794e-06, |
|
"loss": 1.7737, |
|
"step": 2282 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.039834957835933e-06, |
|
"loss": 1.8049, |
|
"step": 2284 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.023608910175564e-06, |
|
"loss": 1.8058, |
|
"step": 2286 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.007407303353156e-06, |
|
"loss": 1.7713, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.991230203626234e-06, |
|
"loss": 1.7632, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.97507767715209e-06, |
|
"loss": 1.7341, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.9589497899875265e-06, |
|
"loss": 1.8128, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.942846608088583e-06, |
|
"loss": 1.7962, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.926768197310259e-06, |
|
"loss": 1.7576, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.910714623406263e-06, |
|
"loss": 1.7766, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.894685952028716e-06, |
|
"loss": 1.843, |
|
"step": 2302 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.8786822487279145e-06, |
|
"loss": 1.8207, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.862703578952034e-06, |
|
"loss": 1.7656, |
|
"step": 2306 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.846750008046875e-06, |
|
"loss": 1.729, |
|
"step": 2308 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.830821601255603e-06, |
|
"loss": 1.7607, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.814918423718467e-06, |
|
"loss": 1.7865, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.799040540472536e-06, |
|
"loss": 1.8241, |
|
"step": 2314 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.7831880164514467e-06, |
|
"loss": 1.7874, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.7673609164851197e-06, |
|
"loss": 1.8109, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.7515593052995027e-06, |
|
"loss": 1.8103, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.735783247516305e-06, |
|
"loss": 1.7593, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.72003280765274e-06, |
|
"loss": 1.7942, |
|
"step": 2324 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.704308050121248e-06, |
|
"loss": 1.77, |
|
"step": 2326 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.6886090392292397e-06, |
|
"loss": 1.7753, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.672935839178842e-06, |
|
"loss": 1.7744, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.6572885140666125e-06, |
|
"loss": 1.7915, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.6416671278833072e-06, |
|
"loss": 1.7754, |
|
"step": 2334 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.6260717445135886e-06, |
|
"loss": 1.7285, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.6105024277357925e-06, |
|
"loss": 1.7752, |
|
"step": 2338 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.5949592412216437e-06, |
|
"loss": 1.7785, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.5794422485360058e-06, |
|
"loss": 1.7382, |
|
"step": 2342 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.5639515131366297e-06, |
|
"loss": 1.7592, |
|
"step": 2344 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3.5484870983738774e-06, |
|
"loss": 1.7759, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3.5330490674904737e-06, |
|
"loss": 1.7923, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3.517637483621241e-06, |
|
"loss": 1.7296, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3.5022524097928546e-06, |
|
"loss": 1.7618, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.4868939089235666e-06, |
|
"loss": 1.7458, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.471562043822957e-06, |
|
"loss": 1.8129, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.456256877191684e-06, |
|
"loss": 1.7321, |
|
"step": 2358 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.4409784716212124e-06, |
|
"loss": 1.773, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.425726889593577e-06, |
|
"loss": 1.8245, |
|
"step": 2362 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.4105021934811e-06, |
|
"loss": 1.7942, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.3953044455461705e-06, |
|
"loss": 1.7436, |
|
"step": 2366 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.3801337079409566e-06, |
|
"loss": 1.8017, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.364990042707168e-06, |
|
"loss": 1.773, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.3498735117758107e-06, |
|
"loss": 1.7625, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.334784176966912e-06, |
|
"loss": 1.7515, |
|
"step": 2374 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.3197220999892785e-06, |
|
"loss": 1.7847, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.304687342440257e-06, |
|
"loss": 1.7762, |
|
"step": 2378 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.289679965805457e-06, |
|
"loss": 1.7914, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.274700031458514e-06, |
|
"loss": 1.771, |
|
"step": 2382 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.2597476006608388e-06, |
|
"loss": 1.7516, |
|
"step": 2384 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.244822734561368e-06, |
|
"loss": 1.7604, |
|
"step": 2386 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.2299254941963055e-06, |
|
"loss": 1.7139, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.215055940488875e-06, |
|
"loss": 1.7367, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.2002141342490854e-06, |
|
"loss": 1.7927, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.1854001361734564e-06, |
|
"loss": 1.6907, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.170614006844799e-06, |
|
"loss": 1.7805, |
|
"step": 2396 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.155855806731938e-06, |
|
"loss": 1.783, |
|
"step": 2398 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.141125596189494e-06, |
|
"loss": 1.7553, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.126423435457614e-06, |
|
"loss": 1.7994, |
|
"step": 2402 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.11174938466173e-06, |
|
"loss": 1.7779, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.0971035038123297e-06, |
|
"loss": 1.7561, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.0824858528046873e-06, |
|
"loss": 1.7267, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.0678964914186282e-06, |
|
"loss": 1.7596, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.053335479318297e-06, |
|
"loss": 1.7735, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.038802876051891e-06, |
|
"loss": 1.8203, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.024298741051429e-06, |
|
"loss": 1.7813, |
|
"step": 2416 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.00982313363251e-06, |
|
"loss": 1.7504, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 2.9953761129940706e-06, |
|
"loss": 1.7908, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 2.9809577382181344e-06, |
|
"loss": 1.7508, |
|
"step": 2422 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 2.966568068269574e-06, |
|
"loss": 1.7996, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.952207161995879e-06, |
|
"loss": 1.8341, |
|
"step": 2426 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.937875078126907e-06, |
|
"loss": 1.7176, |
|
"step": 2428 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.92357187527464e-06, |
|
"loss": 1.689, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.9092976119329485e-06, |
|
"loss": 1.7535, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.8950523464773604e-06, |
|
"loss": 1.8065, |
|
"step": 2434 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.8808361371648073e-06, |
|
"loss": 1.7231, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.866649042133396e-06, |
|
"loss": 1.7789, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.852491119402172e-06, |
|
"loss": 1.6767, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.8383624268708766e-06, |
|
"loss": 1.6767, |
|
"step": 2442 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.8242630223197064e-06, |
|
"loss": 1.7596, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.8101929634090964e-06, |
|
"loss": 1.7839, |
|
"step": 2446 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.7961523076794584e-06, |
|
"loss": 1.7519, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 2.782141112550961e-06, |
|
"loss": 1.7686, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 2.7681594353232934e-06, |
|
"loss": 1.7489, |
|
"step": 2452 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 2.7542073331754316e-06, |
|
"loss": 1.7261, |
|
"step": 2454 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 2.7402848631653956e-06, |
|
"loss": 1.745, |
|
"step": 2456 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.726392082230034e-06, |
|
"loss": 1.7932, |
|
"step": 2458 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.7125290471847653e-06, |
|
"loss": 1.7539, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.6986958147233754e-06, |
|
"loss": 1.758, |
|
"step": 2462 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.684892441417759e-06, |
|
"loss": 1.6835, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.671118983717702e-06, |
|
"loss": 1.7887, |
|
"step": 2466 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.6573754979506574e-06, |
|
"loss": 1.763, |
|
"step": 2468 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.6436620403214953e-06, |
|
"loss": 1.7728, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.629978666912284e-06, |
|
"loss": 1.7607, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 2.616325433682072e-06, |
|
"loss": 1.7733, |
|
"step": 2474 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 2.6027023964666354e-06, |
|
"loss": 1.7926, |
|
"step": 2476 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 2.5891096109782644e-06, |
|
"loss": 1.7717, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 2.5755471328055394e-06, |
|
"loss": 1.8207, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.56201501741309e-06, |
|
"loss": 1.7626, |
|
"step": 2482 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.548513320141377e-06, |
|
"loss": 1.7431, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.5350420962064614e-06, |
|
"loss": 1.7708, |
|
"step": 2486 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.5216014006997925e-06, |
|
"loss": 1.7373, |
|
"step": 2488 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.5081912885879558e-06, |
|
"loss": 1.7781, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.49481181471248e-06, |
|
"loss": 1.6714, |
|
"step": 2492 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.4814630337895816e-06, |
|
"loss": 1.7549, |
|
"step": 2494 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.4681450004099715e-06, |
|
"loss": 1.7998, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.4548577690386044e-06, |
|
"loss": 1.7495, |
|
"step": 2498 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.44160139401447e-06, |
|
"loss": 1.7551, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.428375929550377e-06, |
|
"loss": 1.7441, |
|
"step": 2502 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.4151814297327157e-06, |
|
"loss": 1.7314, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.4020179485212437e-06, |
|
"loss": 1.7881, |
|
"step": 2506 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.388885539748873e-06, |
|
"loss": 1.7789, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.3757842571214384e-06, |
|
"loss": 1.7356, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.36271415421748e-06, |
|
"loss": 1.7548, |
|
"step": 2512 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.349675284488029e-06, |
|
"loss": 1.7691, |
|
"step": 2514 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.336667701256391e-06, |
|
"loss": 1.7612, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.323691457717916e-06, |
|
"loss": 1.6773, |
|
"step": 2518 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.3107466069397886e-06, |
|
"loss": 1.7832, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.297833201860816e-06, |
|
"loss": 1.7523, |
|
"step": 2522 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.284951295291208e-06, |
|
"loss": 1.7636, |
|
"step": 2524 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.272100939912347e-06, |
|
"loss": 1.6904, |
|
"step": 2526 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.2592821882766e-06, |
|
"loss": 1.7878, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.246495092807077e-06, |
|
"loss": 1.7404, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.2337397057974343e-06, |
|
"loss": 1.7535, |
|
"step": 2532 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.2210160794116466e-06, |
|
"loss": 1.7139, |
|
"step": 2534 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.2083242656838134e-06, |
|
"loss": 1.7012, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.195664316517926e-06, |
|
"loss": 1.7612, |
|
"step": 2538 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.1830362836876617e-06, |
|
"loss": 1.8057, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.170440218836184e-06, |
|
"loss": 1.7806, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.1578761734759122e-06, |
|
"loss": 1.7413, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.1453441989883215e-06, |
|
"loss": 1.7482, |
|
"step": 2546 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.132844346623731e-06, |
|
"loss": 1.7893, |
|
"step": 2548 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.1203766675011007e-06, |
|
"loss": 1.7371, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.1079412126078035e-06, |
|
"loss": 1.753, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.0955380327994445e-06, |
|
"loss": 1.7221, |
|
"step": 2554 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.083167178799623e-06, |
|
"loss": 1.7094, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.0708287011997528e-06, |
|
"loss": 1.7715, |
|
"step": 2558 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.0585226504588306e-06, |
|
"loss": 1.7701, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.0462490769032528e-06, |
|
"loss": 1.697, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.0340080307265887e-06, |
|
"loss": 1.6981, |
|
"step": 2564 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.0217995619893894e-06, |
|
"loss": 1.7085, |
|
"step": 2566 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.009623720618974e-06, |
|
"loss": 1.7761, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.9974805564092403e-06, |
|
"loss": 1.748, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.9853701190204387e-06, |
|
"loss": 1.6954, |
|
"step": 2572 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.9732924579789857e-06, |
|
"loss": 1.7337, |
|
"step": 2574 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.9612476226772627e-06, |
|
"loss": 1.7605, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.9492356623733987e-06, |
|
"loss": 1.7695, |
|
"step": 2578 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.937256626191083e-06, |
|
"loss": 1.78, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.925310563119358e-06, |
|
"loss": 1.7368, |
|
"step": 2582 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.9133975220124246e-06, |
|
"loss": 1.7892, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.9015175515894303e-06, |
|
"loss": 1.75, |
|
"step": 2586 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.8896707004342851e-06, |
|
"loss": 1.7678, |
|
"step": 2588 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.8778570169954568e-06, |
|
"loss": 1.7973, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.8660765495857648e-06, |
|
"loss": 1.7518, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.8543293463821922e-06, |
|
"loss": 1.7464, |
|
"step": 2594 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.8426154554256836e-06, |
|
"loss": 1.7284, |
|
"step": 2596 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.8309349246209607e-06, |
|
"loss": 1.7154, |
|
"step": 2598 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.8192878017363048e-06, |
|
"loss": 1.7579, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.8076741344033777e-06, |
|
"loss": 1.7239, |
|
"step": 2602 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.7960939701170278e-06, |
|
"loss": 1.8123, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.7845473562350835e-06, |
|
"loss": 1.7921, |
|
"step": 2606 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.773034339978167e-06, |
|
"loss": 1.7595, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.7615549684295074e-06, |
|
"loss": 1.7653, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.7501092885347349e-06, |
|
"loss": 1.7557, |
|
"step": 2612 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.7386973471016954e-06, |
|
"loss": 1.7247, |
|
"step": 2614 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.7273191908002663e-06, |
|
"loss": 1.7685, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.7159748661621501e-06, |
|
"loss": 1.7214, |
|
"step": 2618 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.7046644195806995e-06, |
|
"loss": 1.7555, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.6933878973107133e-06, |
|
"loss": 1.7323, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.6821453454682635e-06, |
|
"loss": 1.7416, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.6709368100304911e-06, |
|
"loss": 1.7537, |
|
"step": 2626 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.6597623368354277e-06, |
|
"loss": 1.8397, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.6486219715817998e-06, |
|
"loss": 1.7253, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.6375157598288572e-06, |
|
"loss": 1.7509, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.6264437469961703e-06, |
|
"loss": 1.7857, |
|
"step": 2634 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.615405978363447e-06, |
|
"loss": 1.7102, |
|
"step": 2636 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.6044024990703634e-06, |
|
"loss": 1.7563, |
|
"step": 2638 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.593433354116356e-06, |
|
"loss": 1.786, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.5824985883604526e-06, |
|
"loss": 1.7106, |
|
"step": 2642 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.5715982465210844e-06, |
|
"loss": 1.7562, |
|
"step": 2644 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.560732373175907e-06, |
|
"loss": 1.7084, |
|
"step": 2646 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.5499010127616087e-06, |
|
"loss": 1.6984, |
|
"step": 2648 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.539104209573743e-06, |
|
"loss": 1.7293, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.5283420077665312e-06, |
|
"loss": 1.7559, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.517614451352697e-06, |
|
"loss": 1.7396, |
|
"step": 2654 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.5069215842032725e-06, |
|
"loss": 1.7581, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.4962634500474338e-06, |
|
"loss": 1.7492, |
|
"step": 2658 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.485640092472308e-06, |
|
"loss": 1.7579, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.475051554922804e-06, |
|
"loss": 1.793, |
|
"step": 2662 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.4644978807014276e-06, |
|
"loss": 1.7413, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.4539791129681157e-06, |
|
"loss": 1.7035, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.4434952947400505e-06, |
|
"loss": 1.6727, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.4330464688914792e-06, |
|
"loss": 1.7254, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.422632678153557e-06, |
|
"loss": 1.7443, |
|
"step": 2672 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.412253965114152e-06, |
|
"loss": 1.767, |
|
"step": 2674 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.401910372217684e-06, |
|
"loss": 1.7036, |
|
"step": 2676 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.3916019417649418e-06, |
|
"loss": 1.7526, |
|
"step": 2678 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.3813287159129208e-06, |
|
"loss": 1.7227, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.371090736674644e-06, |
|
"loss": 1.7712, |
|
"step": 2682 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.3608880459189877e-06, |
|
"loss": 1.719, |
|
"step": 2684 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.3507206853705178e-06, |
|
"loss": 1.8072, |
|
"step": 2686 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.340588696609313e-06, |
|
"loss": 1.7315, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.3304921210707922e-06, |
|
"loss": 1.6884, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.3204310000455612e-06, |
|
"loss": 1.7803, |
|
"step": 2692 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.31040537467922e-06, |
|
"loss": 1.7541, |
|
"step": 2694 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.3004152859722152e-06, |
|
"loss": 1.7513, |
|
"step": 2696 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.2904607747796561e-06, |
|
"loss": 1.7111, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.2805418818111658e-06, |
|
"loss": 1.811, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.2706586476306971e-06, |
|
"loss": 1.7671, |
|
"step": 2702 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.2608111126563715e-06, |
|
"loss": 1.7514, |
|
"step": 2704 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.2509993171603263e-06, |
|
"loss": 1.7736, |
|
"step": 2706 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.2412233012685315e-06, |
|
"loss": 1.7376, |
|
"step": 2708 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.2314831049606325e-06, |
|
"loss": 1.7316, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.221778768069799e-06, |
|
"loss": 1.7132, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.2121103302825388e-06, |
|
"loss": 1.7622, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.2024778311385588e-06, |
|
"loss": 1.7271, |
|
"step": 2716 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.1928813100305826e-06, |
|
"loss": 1.6945, |
|
"step": 2718 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.1833208062042078e-06, |
|
"loss": 1.7107, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.1737963587577318e-06, |
|
"loss": 1.7716, |
|
"step": 2722 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.1643080066419977e-06, |
|
"loss": 1.721, |
|
"step": 2724 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.154855788660234e-06, |
|
"loss": 1.7474, |
|
"step": 2726 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.1454397434679022e-06, |
|
"loss": 1.6985, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.1360599095725243e-06, |
|
"loss": 1.7498, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.1267163253335378e-06, |
|
"loss": 1.7129, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.1174090289621386e-06, |
|
"loss": 1.756, |
|
"step": 2734 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.1081380585211133e-06, |
|
"loss": 1.7459, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.0989034519246956e-06, |
|
"loss": 1.7708, |
|
"step": 2738 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.0897052469384095e-06, |
|
"loss": 1.6919, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.0805434811789073e-06, |
|
"loss": 1.7396, |
|
"step": 2742 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.071418192113821e-06, |
|
"loss": 1.7501, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.0623294170616128e-06, |
|
"loss": 1.7437, |
|
"step": 2746 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.0532771931914177e-06, |
|
"loss": 1.765, |
|
"step": 2748 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.0442615575228875e-06, |
|
"loss": 1.7283, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.0352825469260485e-06, |
|
"loss": 1.7591, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.0263401981211475e-06, |
|
"loss": 1.7487, |
|
"step": 2754 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.0174345476784963e-06, |
|
"loss": 1.7649, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.008565632018328e-06, |
|
"loss": 1.7493, |
|
"step": 2758 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 9.997334874106468e-07, |
|
"loss": 1.7429, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 9.909381499750824e-07, |
|
"loss": 1.743, |
|
"step": 2762 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 9.82179655680734e-07, |
|
"loss": 1.7327, |
|
"step": 2764 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 9.734580403460281e-07, |
|
"loss": 1.7447, |
|
"step": 2766 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 9.647733396385794e-07, |
|
"loss": 1.7381, |
|
"step": 2768 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 9.56125589075032e-07, |
|
"loss": 1.7667, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 9.475148240209175e-07, |
|
"loss": 1.7485, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 9.389410796905229e-07, |
|
"loss": 1.7752, |
|
"step": 2774 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 9.304043911467242e-07, |
|
"loss": 1.7736, |
|
"step": 2776 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 9.219047933008662e-07, |
|
"loss": 1.7809, |
|
"step": 2778 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 9.134423209125998e-07, |
|
"loss": 1.7512, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 9.05017008589758e-07, |
|
"loss": 1.7496, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 8.966288907881981e-07, |
|
"loss": 1.7285, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 8.882780018116688e-07, |
|
"loss": 1.7315, |
|
"step": 2786 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.799643758116739e-07, |
|
"loss": 1.7887, |
|
"step": 2788 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.716880467873235e-07, |
|
"loss": 1.7173, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.634490485851998e-07, |
|
"loss": 1.7744, |
|
"step": 2792 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.552474148992174e-07, |
|
"loss": 1.7322, |
|
"step": 2794 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 8.470831792704925e-07, |
|
"loss": 1.7163, |
|
"step": 2796 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 8.389563750871921e-07, |
|
"loss": 1.7543, |
|
"step": 2798 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 8.308670355844051e-07, |
|
"loss": 1.7267, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 8.228151938440132e-07, |
|
"loss": 1.7467, |
|
"step": 2802 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 8.148008827945431e-07, |
|
"loss": 1.7355, |
|
"step": 2804 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 8.06824135211034e-07, |
|
"loss": 1.7533, |
|
"step": 2806 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 7.98884983714917e-07, |
|
"loss": 1.8103, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 7.90983460773862e-07, |
|
"loss": 1.8002, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.831195987016604e-07, |
|
"loss": 1.7631, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.752934296580816e-07, |
|
"loss": 1.7571, |
|
"step": 2814 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.675049856487549e-07, |
|
"loss": 1.7618, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.597542985250228e-07, |
|
"loss": 1.7473, |
|
"step": 2818 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 7.520413999838205e-07, |
|
"loss": 1.7714, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 7.44366321567549e-07, |
|
"loss": 1.7322, |
|
"step": 2822 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 7.36729094663936e-07, |
|
"loss": 1.7755, |
|
"step": 2824 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 7.291297505059158e-07, |
|
"loss": 1.7062, |
|
"step": 2826 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 7.215683201714951e-07, |
|
"loss": 1.7667, |
|
"step": 2828 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 7.14044834583637e-07, |
|
"loss": 1.7912, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 7.065593245101188e-07, |
|
"loss": 1.6748, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 6.991118205634184e-07, |
|
"loss": 1.8296, |
|
"step": 2834 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 6.917023532005885e-07, |
|
"loss": 1.6914, |
|
"step": 2836 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 6.843309527231212e-07, |
|
"loss": 1.7107, |
|
"step": 2838 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 6.76997649276836e-07, |
|
"loss": 1.7289, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 6.697024728517531e-07, |
|
"loss": 1.7092, |
|
"step": 2842 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 6.624454532819702e-07, |
|
"loss": 1.7089, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 6.552266202455348e-07, |
|
"loss": 1.7465, |
|
"step": 2846 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 6.480460032643321e-07, |
|
"loss": 1.7386, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 6.409036317039619e-07, |
|
"loss": 1.7095, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 6.337995347736137e-07, |
|
"loss": 1.8104, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 6.2673374152595e-07, |
|
"loss": 1.76, |
|
"step": 2854 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 6.197062808569909e-07, |
|
"loss": 1.7191, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 6.127171815059918e-07, |
|
"loss": 1.7726, |
|
"step": 2858 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 6.057664720553258e-07, |
|
"loss": 1.7279, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 5.988541809303671e-07, |
|
"loss": 1.7625, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 5.919803363993815e-07, |
|
"loss": 1.7329, |
|
"step": 2864 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 5.851449665733977e-07, |
|
"loss": 1.7362, |
|
"step": 2866 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 5.783480994061019e-07, |
|
"loss": 1.7445, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 5.715897626937261e-07, |
|
"loss": 1.816, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 5.648699840749205e-07, |
|
"loss": 1.7441, |
|
"step": 2872 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 5.581887910306594e-07, |
|
"loss": 1.7526, |
|
"step": 2874 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.515462108841107e-07, |
|
"loss": 1.7149, |
|
"step": 2876 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.44942270800537e-07, |
|
"loss": 1.6937, |
|
"step": 2878 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.383769977871778e-07, |
|
"loss": 1.748, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.318504186931416e-07, |
|
"loss": 1.7413, |
|
"step": 2882 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.253625602092971e-07, |
|
"loss": 1.7593, |
|
"step": 2884 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.189134488681602e-07, |
|
"loss": 1.7053, |
|
"step": 2886 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.125031110437883e-07, |
|
"loss": 1.7037, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.061315729516736e-07, |
|
"loss": 1.6846, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.997988606486336e-07, |
|
"loss": 1.7661, |
|
"step": 2892 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.935050000327046e-07, |
|
"loss": 1.7705, |
|
"step": 2894 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.87250016843035e-07, |
|
"loss": 1.7635, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.81033936659786e-07, |
|
"loss": 1.7525, |
|
"step": 2898 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.7485678490401755e-07, |
|
"loss": 1.7974, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.6871858683759206e-07, |
|
"loss": 1.6898, |
|
"step": 2902 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.6261936756306746e-07, |
|
"loss": 1.7686, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.565591520235957e-07, |
|
"loss": 1.7554, |
|
"step": 2906 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 4.5053796500282076e-07, |
|
"loss": 1.7925, |
|
"step": 2908 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 4.445558311247755e-07, |
|
"loss": 1.738, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 4.3861277485378384e-07, |
|
"loss": 1.778, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 4.327088204943597e-07, |
|
"loss": 1.7361, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.2684399219110493e-07, |
|
"loss": 1.7336, |
|
"step": 2916 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.2101831392861505e-07, |
|
"loss": 1.7825, |
|
"step": 2918 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.1523180953137785e-07, |
|
"loss": 1.7401, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.094845026636773e-07, |
|
"loss": 1.7292, |
|
"step": 2922 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.0377641682949667e-07, |
|
"loss": 1.7585, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.9810757537242175e-07, |
|
"loss": 1.7571, |
|
"step": 2926 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.9247800147554805e-07, |
|
"loss": 1.7688, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.868877181613806e-07, |
|
"loss": 1.7733, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 3.8133674829174515e-07, |
|
"loss": 1.7647, |
|
"step": 2932 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 3.7582511456769165e-07, |
|
"loss": 1.7717, |
|
"step": 2934 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 3.703528395294043e-07, |
|
"loss": 1.7208, |
|
"step": 2936 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 3.6491994555610257e-07, |
|
"loss": 1.6513, |
|
"step": 2938 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.595264548659616e-07, |
|
"loss": 1.7232, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.5417238951600986e-07, |
|
"loss": 1.7238, |
|
"step": 2942 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.4885777140204025e-07, |
|
"loss": 1.7453, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.4358262225853255e-07, |
|
"loss": 1.7241, |
|
"step": 2946 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 3.383469636585468e-07, |
|
"loss": 1.8228, |
|
"step": 2948 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 3.331508170136477e-07, |
|
"loss": 1.6952, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 3.2799420357381486e-07, |
|
"loss": 1.7492, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 3.2287714442735264e-07, |
|
"loss": 1.6934, |
|
"step": 2954 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.177996605008038e-07, |
|
"loss": 1.7871, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.1276177255886606e-07, |
|
"loss": 1.7564, |
|
"step": 2958 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.0776350120431233e-07, |
|
"loss": 1.7099, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.028048668778938e-07, |
|
"loss": 1.7299, |
|
"step": 2962 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.978858898582659e-07, |
|
"loss": 1.7445, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.9300659026190504e-07, |
|
"loss": 1.7694, |
|
"step": 2966 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.8816698804302043e-07, |
|
"loss": 1.7813, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.8336710299348034e-07, |
|
"loss": 1.7317, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.786069547427239e-07, |
|
"loss": 1.7423, |
|
"step": 2972 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.738865627576881e-07, |
|
"loss": 1.7888, |
|
"step": 2974 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.692059463427177e-07, |
|
"loss": 1.7366, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.645651246394976e-07, |
|
"loss": 1.7844, |
|
"step": 2978 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.599641166269684e-07, |
|
"loss": 1.6804, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.5540294112125107e-07, |
|
"loss": 1.76, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.508816167755668e-07, |
|
"loss": 1.7709, |
|
"step": 2984 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.464001620801637e-07, |
|
"loss": 1.7451, |
|
"step": 2986 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.4195859536224165e-07, |
|
"loss": 1.7944, |
|
"step": 2988 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.3755693478587416e-07, |
|
"loss": 1.6974, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.331951983519365e-07, |
|
"loss": 1.7314, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.2887340389803338e-07, |
|
"loss": 1.7597, |
|
"step": 2994 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.245915690984224e-07, |
|
"loss": 1.7064, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.2034971146394302e-07, |
|
"loss": 1.7006, |
|
"step": 2998 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.1614784834194658e-07, |
|
"loss": 1.7599, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.1198599691622634e-07, |
|
"loss": 1.7349, |
|
"step": 3002 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.0786417420693982e-07, |
|
"loss": 1.721, |
|
"step": 3004 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.0378239707054882e-07, |
|
"loss": 1.7761, |
|
"step": 3006 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.9974068219974607e-07, |
|
"loss": 1.7402, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.9573904612338545e-07, |
|
"loss": 1.7401, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.9177750520641525e-07, |
|
"loss": 1.7541, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.878560756498149e-07, |
|
"loss": 1.6704, |
|
"step": 3014 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.8397477349052395e-07, |
|
"loss": 1.776, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.801336146013777e-07, |
|
"loss": 1.7534, |
|
"step": 3018 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.7633261469104378e-07, |
|
"loss": 1.7372, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.7257178930395912e-07, |
|
"loss": 1.7403, |
|
"step": 3022 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.6885115382026084e-07, |
|
"loss": 1.8037, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.651707234557287e-07, |
|
"loss": 1.7416, |
|
"step": 3026 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.6153051326172063e-07, |
|
"loss": 1.7563, |
|
"step": 3028 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.5793053812511172e-07, |
|
"loss": 1.7859, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.5437081276823417e-07, |
|
"loss": 1.7562, |
|
"step": 3032 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.5085135174881416e-07, |
|
"loss": 1.7266, |
|
"step": 3034 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.4737216945991505e-07, |
|
"loss": 1.7662, |
|
"step": 3036 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.4393328012987872e-07, |
|
"loss": 1.8232, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.4053469782226437e-07, |
|
"loss": 1.7156, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.3717643643579525e-07, |
|
"loss": 1.7574, |
|
"step": 3042 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.3385850970429882e-07, |
|
"loss": 1.7577, |
|
"step": 3044 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.3058093119664882e-07, |
|
"loss": 1.7602, |
|
"step": 3046 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.2734371431671777e-07, |
|
"loss": 1.7145, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.2414687230331124e-07, |
|
"loss": 1.7752, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.2099041823012136e-07, |
|
"loss": 1.7904, |
|
"step": 3052 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.178743650056724e-07, |
|
"loss": 1.8119, |
|
"step": 3054 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.147987253732652e-07, |
|
"loss": 1.7413, |
|
"step": 3056 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.1176351191092727e-07, |
|
"loss": 1.7737, |
|
"step": 3058 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.0876873703135949e-07, |
|
"loss": 1.7112, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.0581441298188944e-07, |
|
"loss": 1.7561, |
|
"step": 3062 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.0290055184441372e-07, |
|
"loss": 1.7256, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.0002716553535685e-07, |
|
"loss": 1.728, |
|
"step": 3066 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 9.719426580561908e-08, |
|
"loss": 1.7338, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 9.440186424052755e-08, |
|
"loss": 1.7488, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 9.164997225978745e-08, |
|
"loss": 1.7515, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 8.893860111743868e-08, |
|
"loss": 1.7867, |
|
"step": 3074 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 8.62677619018104e-08, |
|
"loss": 1.7451, |
|
"step": 3076 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 8.363746553547214e-08, |
|
"loss": 1.744, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 8.104772277519047e-08, |
|
"loss": 1.7599, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 7.849854421188574e-08, |
|
"loss": 1.7123, |
|
"step": 3082 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.598994027058992e-08, |
|
"loss": 1.7468, |
|
"step": 3084 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.352192121039992e-08, |
|
"loss": 1.6844, |
|
"step": 3086 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.109449712443873e-08, |
|
"loss": 1.7591, |
|
"step": 3088 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.870767793981658e-08, |
|
"loss": 1.7483, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.636147341758215e-08, |
|
"loss": 1.7963, |
|
"step": 3092 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.405589315269246e-08, |
|
"loss": 1.7774, |
|
"step": 3094 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.179094657396634e-08, |
|
"loss": 1.7685, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 5.9566642944050015e-08, |
|
"loss": 1.7408, |
|
"step": 3098 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 5.7382991359375975e-08, |
|
"loss": 1.7275, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 5.5240000750129695e-08, |
|
"loss": 1.7765, |
|
"step": 3102 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 5.313767988020857e-08, |
|
"loss": 1.7012, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 5.107603734719191e-08, |
|
"loss": 1.6892, |
|
"step": 3106 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.905508158229877e-08, |
|
"loss": 1.7605, |
|
"step": 3108 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.7074820850357974e-08, |
|
"loss": 1.7936, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.513526324977591e-08, |
|
"loss": 1.7406, |
|
"step": 3112 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.3236416712496565e-08, |
|
"loss": 1.69, |
|
"step": 3114 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 4.1378289003977115e-08, |
|
"loss": 1.7447, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 3.956088772315014e-08, |
|
"loss": 1.7381, |
|
"step": 3118 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 3.7784220302397036e-08, |
|
"loss": 1.7742, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 3.604829400751242e-08, |
|
"loss": 1.7281, |
|
"step": 3122 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 3.435311593768087e-08, |
|
"loss": 1.782, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 3.2698693025441374e-08, |
|
"loss": 1.7599, |
|
"step": 3126 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 3.108503203666402e-08, |
|
"loss": 1.7887, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 2.9512139570520016e-08, |
|
"loss": 1.7642, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.7980022059453938e-08, |
|
"loss": 1.776, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.6488685769161528e-08, |
|
"loss": 1.7862, |
|
"step": 3134 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.5038136798556377e-08, |
|
"loss": 1.8244, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.3628381079754403e-08, |
|
"loss": 1.7425, |
|
"step": 3138 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 2.2259424378041628e-08, |
|
"loss": 1.7029, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 2.093127229185532e-08, |
|
"loss": 1.7273, |
|
"step": 3142 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.9643930252760678e-08, |
|
"loss": 1.7155, |
|
"step": 3144 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.8397403525424184e-08, |
|
"loss": 1.7583, |
|
"step": 3146 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.7789447503233638e-08, |
|
"loss": 1.7538, |
|
"step": 3148 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.6604153249651923e-08, |
|
"loss": 1.7628, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.54596867391954e-08, |
|
"loss": 1.7237, |
|
"step": 3152 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.4356052652231277e-08, |
|
"loss": 1.8494, |
|
"step": 3154 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.3293255502144776e-08, |
|
"loss": 1.7827, |
|
"step": 3156 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.2271299635311373e-08, |
|
"loss": 1.7227, |
|
"step": 3158 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.1290189231087934e-08, |
|
"loss": 1.7628, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.034992830178716e-08, |
|
"loss": 1.7185, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 9.450520692667609e-09, |
|
"loss": 1.7533, |
|
"step": 3164 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 8.591970081914813e-09, |
|
"loss": 1.7421, |
|
"step": 3166 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 7.774279980626853e-09, |
|
"loss": 1.8076, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 6.9974537328010295e-09, |
|
"loss": 1.777, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 6.261494515317212e-09, |
|
"loss": 1.6956, |
|
"step": 3172 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 5.566405337930069e-09, |
|
"loss": 1.7326, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 4.912189043250193e-09, |
|
"loss": 1.7357, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 4.2988483067374355e-09, |
|
"loss": 1.7927, |
|
"step": 3178 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 3.726385636689811e-09, |
|
"loss": 1.6958, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 3.1948033742290606e-09, |
|
"loss": 1.7589, |
|
"step": 3182 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.7041036932962117e-09, |
|
"loss": 1.7557, |
|
"step": 3184 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.2542886006382547e-09, |
|
"loss": 1.7444, |
|
"step": 3186 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.8453599358048136e-09, |
|
"loss": 1.7866, |
|
"step": 3188 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.477319371133712e-09, |
|
"loss": 1.768, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.1501684117531941e-09, |
|
"loss": 1.7777, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 8.639083955663819e-10, |
|
"loss": 1.7533, |
|
"step": 3194 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 6.185404932523841e-10, |
|
"loss": 1.7906, |
|
"step": 3196 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.140657082607469e-10, |
|
"loss": 1.6971, |
|
"step": 3198 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.504848768025703e-10, |
|
"loss": 1.7139, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.2779866785161966e-10, |
|
"loss": 1.7268, |
|
"step": 3202 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600758314321496e-11, |
|
"loss": 1.744, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 3204, |
|
"total_flos": 1.1937678110176051e+17, |
|
"train_loss": 1.9303735846586143, |
|
"train_runtime": 18412.5115, |
|
"train_samples_per_second": 11.148, |
|
"train_steps_per_second": 0.174 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 3204, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"total_flos": 1.1937678110176051e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|