|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9944341372912802, |
|
"eval_steps": 800, |
|
"global_step": 4300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0, |
|
"loss": 2.4801, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0, |
|
"loss": 2.4284, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0, |
|
"loss": 2.2651, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0, |
|
"loss": 2.411, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0, |
|
"loss": 2.8299, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 2.2188, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3082402064781276e-06, |
|
"loss": 1.345, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9623603097171917e-06, |
|
"loss": 0.5695, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.262883767531511e-06, |
|
"loss": 0.8812, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.5555756797431724e-06, |
|
"loss": 0.8725, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.7786547836457785e-06, |
|
"loss": 0.4851, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9589528137043157e-06, |
|
"loss": 0.8354, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.110267503805303e-06, |
|
"loss": 0.4873, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.2406394020168525e-06, |
|
"loss": 0.6584, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.3551671365864186e-06, |
|
"loss": 0.529, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.4572878450621517e-06, |
|
"loss": 0.5812, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.5494288615482305e-06, |
|
"loss": 0.5163, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.6333682331099297e-06, |
|
"loss": 0.6595, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.710447450306277e-06, |
|
"loss": 0.4775, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.7817036126729157e-06, |
|
"loss": 0.682, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.84795507876713e-06, |
|
"loss": 0.7048, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.909858960648549e-06, |
|
"loss": 0.9478, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9679508875196075e-06, |
|
"loss": 0.5932, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.022673220704539e-06, |
|
"loss": 0.5733, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.074395524884577e-06, |
|
"loss": 0.5817, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.123429713794031e-06, |
|
"loss": 0.6372, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.170041450985754e-06, |
|
"loss": 0.5108, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.214458864668026e-06, |
|
"loss": 0.6262, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.256879301905398e-06, |
|
"loss": 0.8594, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.297474628787183e-06, |
|
"loss": 0.7241, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.336395436735046e-06, |
|
"loss": 0.6471, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.373774415149143e-06, |
|
"loss": 0.6144, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.409729081127459e-06, |
|
"loss": 0.672, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.444364007946065e-06, |
|
"loss": 0.6802, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.4777726588457195e-06, |
|
"loss": 0.514, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.510038907149524e-06, |
|
"loss": 0.715, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.541238304971202e-06, |
|
"loss": 0.5189, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.5714391488166745e-06, |
|
"loss": 0.7188, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.600703379889684e-06, |
|
"loss": 0.5829, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.629087348946707e-06, |
|
"loss": 0.5551, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.656642469442713e-06, |
|
"loss": 0.7016, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.683415777991895e-06, |
|
"loss": 0.5357, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.709450417491796e-06, |
|
"loss": 0.6232, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.734786055373451e-06, |
|
"loss": 0.7218, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.759459247158257e-06, |
|
"loss": 0.51, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.783503753685794e-06, |
|
"loss": 0.8871, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.806950818921448e-06, |
|
"loss": 0.8177, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8298294140798465e-06, |
|
"loss": 0.5602, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.852166452849314e-06, |
|
"loss": 0.7395, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8739869817278244e-06, |
|
"loss": 0.7008, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.89531434884623e-06, |
|
"loss": 0.7096, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.916170354132174e-06, |
|
"loss": 0.7617, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.936575383236021e-06, |
|
"loss": 0.637, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.956548527281403e-06, |
|
"loss": 0.7149, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.976107690203556e-06, |
|
"loss": 0.585, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.995269685187989e-06, |
|
"loss": 0.4153, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998404594767071e-06, |
|
"loss": 0.8215, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.995213784301213e-06, |
|
"loss": 0.6051, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.992022973835355e-06, |
|
"loss": 0.6785, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.988832163369496e-06, |
|
"loss": 0.6774, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.985641352903638e-06, |
|
"loss": 0.5039, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.982450542437779e-06, |
|
"loss": 0.5724, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.979259731971921e-06, |
|
"loss": 0.8469, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.976068921506063e-06, |
|
"loss": 0.9735, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.972878111040205e-06, |
|
"loss": 0.8594, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.969687300574346e-06, |
|
"loss": 0.6523, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9664964901084875e-06, |
|
"loss": 0.6641, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.96330567964263e-06, |
|
"loss": 0.6586, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.960114869176771e-06, |
|
"loss": 0.6019, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.956924058710913e-06, |
|
"loss": 0.7117, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.953733248245054e-06, |
|
"loss": 0.4632, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.950542437779196e-06, |
|
"loss": 0.733, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.947351627313338e-06, |
|
"loss": 0.7205, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9441608168474795e-06, |
|
"loss": 0.6121, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.940970006381621e-06, |
|
"loss": 0.7859, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9377791959157625e-06, |
|
"loss": 0.8041, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.934588385449905e-06, |
|
"loss": 0.7834, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.931397574984046e-06, |
|
"loss": 0.6704, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.928206764518188e-06, |
|
"loss": 0.8402, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.925015954052329e-06, |
|
"loss": 0.7851, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9218251435864715e-06, |
|
"loss": 0.501, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.918634333120613e-06, |
|
"loss": 0.6039, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9154435226547544e-06, |
|
"loss": 0.64, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.912252712188896e-06, |
|
"loss": 0.5726, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.909061901723038e-06, |
|
"loss": 0.6605, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.90587109125718e-06, |
|
"loss": 0.8105, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.902680280791321e-06, |
|
"loss": 0.8422, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.8994894703254635e-06, |
|
"loss": 0.5242, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.896298659859605e-06, |
|
"loss": 0.6062, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.8931078493937464e-06, |
|
"loss": 0.7289, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.889917038927888e-06, |
|
"loss": 0.6916, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.88672622846203e-06, |
|
"loss": 0.8526, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.883535417996172e-06, |
|
"loss": 1.0668, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.880344607530313e-06, |
|
"loss": 0.6912, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.877153797064455e-06, |
|
"loss": 0.7383, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.873962986598597e-06, |
|
"loss": 0.77, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.870772176132738e-06, |
|
"loss": 0.8328, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.86758136566688e-06, |
|
"loss": 0.7135, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.864390555201021e-06, |
|
"loss": 0.7976, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.861199744735164e-06, |
|
"loss": 0.5799, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.858008934269305e-06, |
|
"loss": 0.5246, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.854818123803447e-06, |
|
"loss": 0.5895, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.851627313337588e-06, |
|
"loss": 0.7751, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.84843650287173e-06, |
|
"loss": 0.7469, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.845245692405872e-06, |
|
"loss": 0.5013, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.842054881940013e-06, |
|
"loss": 0.5398, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.838864071474155e-06, |
|
"loss": 0.4547, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.835673261008296e-06, |
|
"loss": 0.8732, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.832482450542439e-06, |
|
"loss": 0.7671, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.82929164007658e-06, |
|
"loss": 0.6574, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8261008296107215e-06, |
|
"loss": 0.7173, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.822910019144863e-06, |
|
"loss": 0.4371, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.819719208679005e-06, |
|
"loss": 0.6992, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.816528398213147e-06, |
|
"loss": 0.6827, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.813337587747288e-06, |
|
"loss": 0.4919, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.81014677728143e-06, |
|
"loss": 0.9571, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.806955966815571e-06, |
|
"loss": 0.5202, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.8037651563497135e-06, |
|
"loss": 0.7919, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.800574345883855e-06, |
|
"loss": 0.5517, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7973835354179965e-06, |
|
"loss": 0.3889, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.794192724952138e-06, |
|
"loss": 0.5933, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.79100191448628e-06, |
|
"loss": 0.9298, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.787811104020422e-06, |
|
"loss": 0.4758, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.784620293554563e-06, |
|
"loss": 0.5162, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.781429483088705e-06, |
|
"loss": 0.6675, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.778238672622846e-06, |
|
"loss": 0.8493, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7750478621569885e-06, |
|
"loss": 0.6583, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.77185705169113e-06, |
|
"loss": 0.4897, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.768666241225271e-06, |
|
"loss": 0.6633, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.765475430759413e-06, |
|
"loss": 0.782, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.762284620293555e-06, |
|
"loss": 0.815, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.759093809827697e-06, |
|
"loss": 0.4498, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.755902999361838e-06, |
|
"loss": 0.6006, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.75271218889598e-06, |
|
"loss": 0.9473, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.749521378430121e-06, |
|
"loss": 0.4036, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.746330567964263e-06, |
|
"loss": 0.555, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.743139757498405e-06, |
|
"loss": 0.7843, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.739948947032546e-06, |
|
"loss": 0.8376, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.736758136566688e-06, |
|
"loss": 0.5423, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.73356732610083e-06, |
|
"loss": 0.5533, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.7303765156349716e-06, |
|
"loss": 0.5212, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.727185705169113e-06, |
|
"loss": 0.8054, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.7239948947032545e-06, |
|
"loss": 0.438, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.720804084237397e-06, |
|
"loss": 0.6025, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.717613273771538e-06, |
|
"loss": 0.8118, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.71442246330568e-06, |
|
"loss": 0.6911, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.711231652839821e-06, |
|
"loss": 0.7022, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.7080408423739636e-06, |
|
"loss": 0.5918, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.704850031908105e-06, |
|
"loss": 0.6012, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.7016592214422465e-06, |
|
"loss": 0.8031, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.698468410976389e-06, |
|
"loss": 0.7864, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.69527760051053e-06, |
|
"loss": 0.6361, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.692086790044672e-06, |
|
"loss": 0.6619, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.688895979578813e-06, |
|
"loss": 0.5132, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.6857051691129555e-06, |
|
"loss": 0.6111, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.682514358647097e-06, |
|
"loss": 0.7884, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.6793235481812385e-06, |
|
"loss": 0.4355, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.67613273771538e-06, |
|
"loss": 0.7325, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.672941927249522e-06, |
|
"loss": 0.5633, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.669751116783664e-06, |
|
"loss": 0.6415, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.666560306317805e-06, |
|
"loss": 0.6508, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.663369495851947e-06, |
|
"loss": 0.5909, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.660178685386089e-06, |
|
"loss": 0.5651, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.6569878749202305e-06, |
|
"loss": 0.6729, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.653797064454372e-06, |
|
"loss": 0.842, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.650606253988513e-06, |
|
"loss": 0.5844, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.647415443522656e-06, |
|
"loss": 0.7394, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.644224633056797e-06, |
|
"loss": 0.6725, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.641033822590939e-06, |
|
"loss": 0.6416, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.63784301212508e-06, |
|
"loss": 0.7926, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.634652201659222e-06, |
|
"loss": 0.5941, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.631461391193364e-06, |
|
"loss": 0.9582, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.628270580727505e-06, |
|
"loss": 0.4289, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.625079770261647e-06, |
|
"loss": 0.6518, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.621888959795788e-06, |
|
"loss": 0.8722, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.618698149329931e-06, |
|
"loss": 0.5419, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.615507338864072e-06, |
|
"loss": 0.6891, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.612316528398214e-06, |
|
"loss": 0.5157, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.609125717932355e-06, |
|
"loss": 0.7015, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6059349074664965e-06, |
|
"loss": 0.546, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.602744097000639e-06, |
|
"loss": 0.6735, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.59955328653478e-06, |
|
"loss": 0.5564, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.596362476068922e-06, |
|
"loss": 0.5182, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.593171665603063e-06, |
|
"loss": 0.4053, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.5899808551372056e-06, |
|
"loss": 0.4039, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.586790044671347e-06, |
|
"loss": 0.6502, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.5835992342054885e-06, |
|
"loss": 0.8062, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.58040842373963e-06, |
|
"loss": 0.4143, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.5772176132737715e-06, |
|
"loss": 0.5539, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.574026802807914e-06, |
|
"loss": 0.5926, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.570835992342055e-06, |
|
"loss": 0.751, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.567645181876197e-06, |
|
"loss": 0.5886, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.564454371410338e-06, |
|
"loss": 0.677, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.5612635609444805e-06, |
|
"loss": 0.7097, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.558072750478622e-06, |
|
"loss": 0.56, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.5548819400127634e-06, |
|
"loss": 0.4481, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.551691129546905e-06, |
|
"loss": 0.4959, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.548500319081046e-06, |
|
"loss": 0.8399, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.545309508615189e-06, |
|
"loss": 0.6904, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.54211869814933e-06, |
|
"loss": 0.8689, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.538927887683472e-06, |
|
"loss": 0.6232, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.535737077217613e-06, |
|
"loss": 0.6428, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.5325462667517554e-06, |
|
"loss": 0.7462, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.529355456285897e-06, |
|
"loss": 0.529, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.526164645820038e-06, |
|
"loss": 0.4875, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.52297383535418e-06, |
|
"loss": 0.6747, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.519783024888322e-06, |
|
"loss": 0.7061, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.516592214422464e-06, |
|
"loss": 0.7865, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.513401403956605e-06, |
|
"loss": 0.5122, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.5102105934907466e-06, |
|
"loss": 0.4014, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.507019783024889e-06, |
|
"loss": 0.7509, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.50382897255903e-06, |
|
"loss": 0.8073, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.500638162093172e-06, |
|
"loss": 0.3459, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.497447351627314e-06, |
|
"loss": 0.6814, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.494256541161456e-06, |
|
"loss": 1.1027, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.491065730695597e-06, |
|
"loss": 0.5254, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.4878749202297385e-06, |
|
"loss": 0.7436, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.484684109763881e-06, |
|
"loss": 0.4877, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.481493299298022e-06, |
|
"loss": 0.657, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.478302488832164e-06, |
|
"loss": 0.7193, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.475111678366305e-06, |
|
"loss": 0.5461, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.471920867900448e-06, |
|
"loss": 0.5707, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.468730057434589e-06, |
|
"loss": 0.9755, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.4655392469687305e-06, |
|
"loss": 0.551, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.462348436502872e-06, |
|
"loss": 0.499, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.459157626037014e-06, |
|
"loss": 0.4268, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.455966815571156e-06, |
|
"loss": 0.6658, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.452776005105297e-06, |
|
"loss": 0.5642, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.449585194639439e-06, |
|
"loss": 0.6943, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.446394384173581e-06, |
|
"loss": 0.5404, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.4432035737077225e-06, |
|
"loss": 0.7934, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.440012763241864e-06, |
|
"loss": 0.7138, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.4368219527760055e-06, |
|
"loss": 0.5249, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.433631142310147e-06, |
|
"loss": 0.9614, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.430440331844289e-06, |
|
"loss": 0.5915, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.427249521378431e-06, |
|
"loss": 0.6766, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.424058710912572e-06, |
|
"loss": 0.6641, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.420867900446714e-06, |
|
"loss": 0.4849, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.417677089980856e-06, |
|
"loss": 0.7182, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.4144862795149974e-06, |
|
"loss": 0.6782, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.411295469049139e-06, |
|
"loss": 0.4837, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.40810465858328e-06, |
|
"loss": 0.7323, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.404913848117422e-06, |
|
"loss": 0.5807, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.401723037651564e-06, |
|
"loss": 0.373, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.398532227185706e-06, |
|
"loss": 0.5072, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.395341416719847e-06, |
|
"loss": 0.5952, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.392150606253989e-06, |
|
"loss": 0.549, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.388959795788131e-06, |
|
"loss": 0.5918, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.385768985322272e-06, |
|
"loss": 0.4411, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.382578174856414e-06, |
|
"loss": 0.7001, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.379387364390555e-06, |
|
"loss": 0.744, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.376196553924697e-06, |
|
"loss": 0.4091, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.373005743458839e-06, |
|
"loss": 0.7464, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.3698149329929806e-06, |
|
"loss": 0.6164, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.366624122527122e-06, |
|
"loss": 0.6213, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.3634333120612635e-06, |
|
"loss": 0.6991, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.360242501595406e-06, |
|
"loss": 0.5268, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.357051691129547e-06, |
|
"loss": 0.7768, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.353860880663689e-06, |
|
"loss": 0.9204, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.35067007019783e-06, |
|
"loss": 0.5844, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.347479259731972e-06, |
|
"loss": 0.5198, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.344288449266114e-06, |
|
"loss": 0.3069, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.3410976388002555e-06, |
|
"loss": 0.5465, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.337906828334397e-06, |
|
"loss": 0.4729, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.3347160178685384e-06, |
|
"loss": 0.6514, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.331525207402681e-06, |
|
"loss": 0.8142, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.328334396936822e-06, |
|
"loss": 0.6477, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.325143586470964e-06, |
|
"loss": 0.4601, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.321952776005105e-06, |
|
"loss": 0.6687, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.3187619655392475e-06, |
|
"loss": 0.4565, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.315571155073389e-06, |
|
"loss": 0.646, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.31238034460753e-06, |
|
"loss": 0.6145, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.309189534141672e-06, |
|
"loss": 0.3854, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.305998723675814e-06, |
|
"loss": 0.6016, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.302807913209956e-06, |
|
"loss": 0.5223, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.299617102744097e-06, |
|
"loss": 0.6356, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.2964262922782395e-06, |
|
"loss": 0.4599, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.293235481812381e-06, |
|
"loss": 0.6452, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.290044671346522e-06, |
|
"loss": 0.386, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.286853860880664e-06, |
|
"loss": 0.6384, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.283663050414806e-06, |
|
"loss": 0.7654, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.280472239948948e-06, |
|
"loss": 0.6019, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.277281429483089e-06, |
|
"loss": 0.6078, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.274090619017231e-06, |
|
"loss": 0.5181, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.270899808551373e-06, |
|
"loss": 0.6731, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.267708998085514e-06, |
|
"loss": 0.4956, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.264518187619656e-06, |
|
"loss": 0.6115, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.261327377153797e-06, |
|
"loss": 0.7712, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.25813656668794e-06, |
|
"loss": 0.5086, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.254945756222081e-06, |
|
"loss": 0.7241, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.251754945756223e-06, |
|
"loss": 0.5275, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.248564135290364e-06, |
|
"loss": 0.7552, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.245373324824506e-06, |
|
"loss": 0.4292, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.242182514358648e-06, |
|
"loss": 0.7575, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.238991703892789e-06, |
|
"loss": 0.5653, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.235800893426931e-06, |
|
"loss": 0.6882, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.232610082961072e-06, |
|
"loss": 0.6488, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.2294192724952146e-06, |
|
"loss": 0.5522, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.226228462029356e-06, |
|
"loss": 0.578, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.2230376515634975e-06, |
|
"loss": 0.7412, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.219846841097639e-06, |
|
"loss": 0.5138, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.216656030631781e-06, |
|
"loss": 0.6943, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.213465220165923e-06, |
|
"loss": 0.4599, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.210274409700064e-06, |
|
"loss": 0.8815, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.207083599234206e-06, |
|
"loss": 0.6245, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.203892788768347e-06, |
|
"loss": 0.5513, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.2007019783024895e-06, |
|
"loss": 0.4635, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.197511167836631e-06, |
|
"loss": 0.5711, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.1943203573707724e-06, |
|
"loss": 0.5078, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.191129546904914e-06, |
|
"loss": 0.4304, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.187938736439056e-06, |
|
"loss": 0.715, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.184747925973198e-06, |
|
"loss": 0.6305, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.181557115507339e-06, |
|
"loss": 0.6243, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.178366305041481e-06, |
|
"loss": 0.6439, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.175175494575622e-06, |
|
"loss": 0.4782, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.171984684109764e-06, |
|
"loss": 0.4523, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.168793873643906e-06, |
|
"loss": 0.4884, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.165603063178047e-06, |
|
"loss": 0.3461, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.162412252712189e-06, |
|
"loss": 0.2459, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.159221442246331e-06, |
|
"loss": 0.8138, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.156030631780473e-06, |
|
"loss": 0.6026, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.152839821314614e-06, |
|
"loss": 0.5463, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.1496490108487556e-06, |
|
"loss": 0.4317, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.146458200382897e-06, |
|
"loss": 0.6244, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.143267389917039e-06, |
|
"loss": 0.554, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.140076579451181e-06, |
|
"loss": 0.6441, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.136885768985322e-06, |
|
"loss": 0.6233, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.133694958519464e-06, |
|
"loss": 0.5561, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.130504148053606e-06, |
|
"loss": 0.7524, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.1273133375877475e-06, |
|
"loss": 0.4338, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.124122527121889e-06, |
|
"loss": 0.4495, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.1209317166560305e-06, |
|
"loss": 0.5139, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.117740906190173e-06, |
|
"loss": 0.6545, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.114550095724314e-06, |
|
"loss": 0.5588, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.111359285258456e-06, |
|
"loss": 0.609, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.108168474792597e-06, |
|
"loss": 0.553, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.1049776643267395e-06, |
|
"loss": 0.5844, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.101786853860881e-06, |
|
"loss": 0.5779, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.0985960433950225e-06, |
|
"loss": 0.4207, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.095405232929165e-06, |
|
"loss": 0.4617, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.092214422463306e-06, |
|
"loss": 0.6092, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.089023611997448e-06, |
|
"loss": 0.4607, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.085832801531589e-06, |
|
"loss": 0.4239, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.0826419910657315e-06, |
|
"loss": 0.5438, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.079451180599873e-06, |
|
"loss": 0.5006, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.0762603701340144e-06, |
|
"loss": 0.6889, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.073069559668156e-06, |
|
"loss": 0.5742, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.069878749202298e-06, |
|
"loss": 0.8366, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.06668793873644e-06, |
|
"loss": 0.5182, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.063497128270581e-06, |
|
"loss": 0.4807, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.060306317804723e-06, |
|
"loss": 0.3995, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.057115507338865e-06, |
|
"loss": 0.5958, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.0539246968730064e-06, |
|
"loss": 0.4855, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.050733886407148e-06, |
|
"loss": 0.5908, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.047543075941289e-06, |
|
"loss": 0.7867, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.044352265475432e-06, |
|
"loss": 0.7617, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.041161455009573e-06, |
|
"loss": 0.4752, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.037970644543715e-06, |
|
"loss": 0.4732, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.034779834077856e-06, |
|
"loss": 0.635, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.0315890236119976e-06, |
|
"loss": 0.4924, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.02839821314614e-06, |
|
"loss": 0.4416, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.025207402680281e-06, |
|
"loss": 0.4448, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.022016592214423e-06, |
|
"loss": 0.7631, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.018825781748564e-06, |
|
"loss": 0.5035, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.015634971282707e-06, |
|
"loss": 0.3779, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.012444160816848e-06, |
|
"loss": 0.4924, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.0092533503509896e-06, |
|
"loss": 0.3932, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.006062539885131e-06, |
|
"loss": 0.6974, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.0028717294192725e-06, |
|
"loss": 0.7347, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.999680918953415e-06, |
|
"loss": 0.5564, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.996490108487556e-06, |
|
"loss": 0.4424, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.993299298021698e-06, |
|
"loss": 0.5323, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.990108487555839e-06, |
|
"loss": 0.6138, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.9869176770899815e-06, |
|
"loss": 0.5156, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.983726866624123e-06, |
|
"loss": 0.282, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.9805360561582645e-06, |
|
"loss": 0.5392, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.977345245692406e-06, |
|
"loss": 0.5721, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.974154435226547e-06, |
|
"loss": 0.6967, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.97096362476069e-06, |
|
"loss": 0.5348, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.967772814294831e-06, |
|
"loss": 0.6884, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.964582003828973e-06, |
|
"loss": 0.5065, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.961391193363114e-06, |
|
"loss": 0.4505, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.9582003828972565e-06, |
|
"loss": 0.6881, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.955009572431398e-06, |
|
"loss": 0.5952, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.951818761965539e-06, |
|
"loss": 0.5656, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.948627951499681e-06, |
|
"loss": 0.6437, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.945437141033822e-06, |
|
"loss": 0.5179, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.942246330567965e-06, |
|
"loss": 0.5278, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.939055520102106e-06, |
|
"loss": 0.6951, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.935864709636248e-06, |
|
"loss": 0.5468, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.932673899170389e-06, |
|
"loss": 0.5132, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.929483088704531e-06, |
|
"loss": 0.6297, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.926292278238673e-06, |
|
"loss": 0.5472, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.923101467772814e-06, |
|
"loss": 0.6623, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.919910657306956e-06, |
|
"loss": 0.6216, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.916719846841098e-06, |
|
"loss": 0.5332, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.91352903637524e-06, |
|
"loss": 0.4792, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.910338225909381e-06, |
|
"loss": 0.4573, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.9071474154435225e-06, |
|
"loss": 0.5135, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.903956604977665e-06, |
|
"loss": 0.7619, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.900765794511806e-06, |
|
"loss": 0.6681, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.897574984045948e-06, |
|
"loss": 0.7789, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.89438417358009e-06, |
|
"loss": 0.6078, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.8911933631142316e-06, |
|
"loss": 0.4812, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.888002552648373e-06, |
|
"loss": 0.5893, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.8848117421825145e-06, |
|
"loss": 0.4775, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.881620931716657e-06, |
|
"loss": 0.5012, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.878430121250798e-06, |
|
"loss": 0.4752, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.87523931078494e-06, |
|
"loss": 0.4365, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.872048500319081e-06, |
|
"loss": 0.6722, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.8688576898532236e-06, |
|
"loss": 0.6083, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.865666879387365e-06, |
|
"loss": 0.4533, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.8624760689215065e-06, |
|
"loss": 0.5879, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.859285258455648e-06, |
|
"loss": 0.6564, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.85609444798979e-06, |
|
"loss": 0.5475, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.852903637523932e-06, |
|
"loss": 0.5018, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.849712827058073e-06, |
|
"loss": 0.4544, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.846522016592215e-06, |
|
"loss": 0.6603, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.843331206126357e-06, |
|
"loss": 0.6887, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.8401403956604985e-06, |
|
"loss": 0.7819, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.83694958519464e-06, |
|
"loss": 0.5052, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.833758774728781e-06, |
|
"loss": 0.6689, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.830567964262923e-06, |
|
"loss": 0.5564, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.827377153797065e-06, |
|
"loss": 0.3658, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.824186343331207e-06, |
|
"loss": 0.6376, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.820995532865348e-06, |
|
"loss": 0.5681, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.81780472239949e-06, |
|
"loss": 0.5974, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.814613911933632e-06, |
|
"loss": 0.5623, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.8114231014677734e-06, |
|
"loss": 0.6437, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.808232291001915e-06, |
|
"loss": 0.6442, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.8050414805360563e-06, |
|
"loss": 0.4729, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.801850670070198e-06, |
|
"loss": 0.3677, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.79865985960434e-06, |
|
"loss": 0.4295, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7954690491384816e-06, |
|
"loss": 0.6049, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.792278238672623e-06, |
|
"loss": 0.6363, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7890874282067645e-06, |
|
"loss": 0.5939, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.785896617740907e-06, |
|
"loss": 0.5011, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7827058072750483e-06, |
|
"loss": 0.5177, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.77951499680919e-06, |
|
"loss": 0.7722, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.7763241863433313e-06, |
|
"loss": 0.5204, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.773133375877473e-06, |
|
"loss": 0.455, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.769942565411615e-06, |
|
"loss": 0.5397, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.7667517549457565e-06, |
|
"loss": 0.5528, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.763560944479898e-06, |
|
"loss": 0.5286, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.76037013401404e-06, |
|
"loss": 0.5475, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7571793235481818e-06, |
|
"loss": 0.3887, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7539885130823233e-06, |
|
"loss": 0.6288, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7507977026164647e-06, |
|
"loss": 0.5563, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7476068921506066e-06, |
|
"loss": 0.6103, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.744416081684748e-06, |
|
"loss": 0.4141, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.74122527121889e-06, |
|
"loss": 0.4075, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.738034460753032e-06, |
|
"loss": 0.3594, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7348436502871733e-06, |
|
"loss": 0.5157, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.731652839821315e-06, |
|
"loss": 0.4918, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.7284620293554563e-06, |
|
"loss": 0.4456, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.7252712188895986e-06, |
|
"loss": 0.7768, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.72208040842374e-06, |
|
"loss": 0.7511, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.7188895979578815e-06, |
|
"loss": 0.4604, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.715698787492023e-06, |
|
"loss": 0.6048, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.7125079770261653e-06, |
|
"loss": 0.6261, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.709317166560307e-06, |
|
"loss": 0.7588, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.7061263560944483e-06, |
|
"loss": 0.6608, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.7029355456285897e-06, |
|
"loss": 0.5453, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.6997447351627312e-06, |
|
"loss": 0.4361, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.6965539246968735e-06, |
|
"loss": 0.5557, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.693363114231015e-06, |
|
"loss": 0.6371, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.6901723037651565e-06, |
|
"loss": 0.4953, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.686981493299298e-06, |
|
"loss": 0.4157, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.6837906828334403e-06, |
|
"loss": 0.5469, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.6805998723675817e-06, |
|
"loss": 0.4933, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.677409061901723e-06, |
|
"loss": 0.4994, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6742182514358647e-06, |
|
"loss": 0.3726, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6710274409700066e-06, |
|
"loss": 0.5413, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6678366305041485e-06, |
|
"loss": 0.574, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.66464582003829e-06, |
|
"loss": 0.2569, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6614550095724314e-06, |
|
"loss": 0.5012, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6582641991065733e-06, |
|
"loss": 0.586, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.655073388640715e-06, |
|
"loss": 0.4588, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6518825781748567e-06, |
|
"loss": 0.3745, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6486917677089985e-06, |
|
"loss": 0.5444, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.64550095724314e-06, |
|
"loss": 0.5545, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6423101467772815e-06, |
|
"loss": 0.6965, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.6391193363114234e-06, |
|
"loss": 0.4442, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.6359285258455653e-06, |
|
"loss": 0.4866, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.6327377153797067e-06, |
|
"loss": 0.5114, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.6295469049138482e-06, |
|
"loss": 0.5922, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.62635609444799e-06, |
|
"loss": 0.4787, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.623165283982132e-06, |
|
"loss": 0.6709, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.6199744735162735e-06, |
|
"loss": 0.5078, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.616783663050415e-06, |
|
"loss": 0.5999, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.6135928525845564e-06, |
|
"loss": 0.5051, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.6111997447351634e-06, |
|
"loss": 0.4373, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.608008934269305e-06, |
|
"loss": 0.7497, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.6048181238034463e-06, |
|
"loss": 0.458, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.6016273133375878e-06, |
|
"loss": 0.3981, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.59843650287173e-06, |
|
"loss": 0.4995, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.5952456924058716e-06, |
|
"loss": 0.493, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.592054881940013e-06, |
|
"loss": 0.462, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5888640714741545e-06, |
|
"loss": 0.5239, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.585673261008296e-06, |
|
"loss": 0.4376, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5824824505424383e-06, |
|
"loss": 0.6067, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5792916400765798e-06, |
|
"loss": 0.4091, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5761008296107212e-06, |
|
"loss": 0.5261, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5729100191448627e-06, |
|
"loss": 0.5408, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.569719208679005e-06, |
|
"loss": 0.5867, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5665283982131465e-06, |
|
"loss": 0.636, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.563337587747288e-06, |
|
"loss": 0.4329, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.56014677728143e-06, |
|
"loss": 0.7026, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5569559668155713e-06, |
|
"loss": 0.5245, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.553765156349713e-06, |
|
"loss": 0.4929, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5505743458838547e-06, |
|
"loss": 0.4876, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5473835354179966e-06, |
|
"loss": 0.45, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.544192724952138e-06, |
|
"loss": 0.5068, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.54100191448628e-06, |
|
"loss": 0.5647, |
|
"step": 2056 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5378111040204214e-06, |
|
"loss": 0.5048, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5346202935545633e-06, |
|
"loss": 0.457, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5314294830887048e-06, |
|
"loss": 0.4089, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5282386726228462e-06, |
|
"loss": 0.3521, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.5250478621569886e-06, |
|
"loss": 0.3477, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.52185705169113e-06, |
|
"loss": 0.6625, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.5186662412252715e-06, |
|
"loss": 0.3829, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.515475430759413e-06, |
|
"loss": 0.4733, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.5122846202935553e-06, |
|
"loss": 0.4024, |
|
"step": 2092 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.5090938098276968e-06, |
|
"loss": 0.5733, |
|
"step": 2096 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.5059029993618382e-06, |
|
"loss": 0.5788, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.5027121888959797e-06, |
|
"loss": 0.4806, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.499521378430121e-06, |
|
"loss": 0.5091, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.4963305679642635e-06, |
|
"loss": 0.6465, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.493139757498405e-06, |
|
"loss": 0.4821, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.4899489470325464e-06, |
|
"loss": 0.3563, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.486758136566688e-06, |
|
"loss": 0.7174, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.4835673261008302e-06, |
|
"loss": 0.3833, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.4803765156349717e-06, |
|
"loss": 0.6688, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.477185705169113e-06, |
|
"loss": 0.5733, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.4739948947032546e-06, |
|
"loss": 0.5743, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.4708040842373965e-06, |
|
"loss": 0.5219, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.4676132737715384e-06, |
|
"loss": 0.5964, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.46442246330568e-06, |
|
"loss": 0.5981, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.4612316528398214e-06, |
|
"loss": 0.4068, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.4580408423739632e-06, |
|
"loss": 0.3966, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.454850031908105e-06, |
|
"loss": 0.2291, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4516592214422466e-06, |
|
"loss": 0.4695, |
|
"step": 2168 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4484684109763885e-06, |
|
"loss": 0.5594, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.44527760051053e-06, |
|
"loss": 0.5603, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4420867900446714e-06, |
|
"loss": 0.4934, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4388959795788133e-06, |
|
"loss": 0.6316, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4357051691129552e-06, |
|
"loss": 0.3424, |
|
"step": 2188 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.4325143586470967e-06, |
|
"loss": 0.566, |
|
"step": 2192 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.429323548181238e-06, |
|
"loss": 0.3565, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.42613273771538e-06, |
|
"loss": 0.5191, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.422941927249522e-06, |
|
"loss": 0.3848, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.4197511167836634e-06, |
|
"loss": 0.6962, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.416560306317805e-06, |
|
"loss": 0.3646, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.4133694958519464e-06, |
|
"loss": 0.3756, |
|
"step": 2216 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.4101786853860887e-06, |
|
"loss": 0.2853, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.40698787492023e-06, |
|
"loss": 0.2925, |
|
"step": 2224 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.4037970644543716e-06, |
|
"loss": 0.3838, |
|
"step": 2228 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.400606253988513e-06, |
|
"loss": 0.4479, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.3974154435226554e-06, |
|
"loss": 0.5207, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.394224633056797e-06, |
|
"loss": 0.3813, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.3910338225909384e-06, |
|
"loss": 0.4028, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.38784301212508e-06, |
|
"loss": 0.3406, |
|
"step": 2248 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.3846522016592213e-06, |
|
"loss": 0.4495, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.3814613911933636e-06, |
|
"loss": 0.5411, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.378270580727505e-06, |
|
"loss": 0.3533, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.3750797702616465e-06, |
|
"loss": 0.5577, |
|
"step": 2264 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.371888959795788e-06, |
|
"loss": 0.4198, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.3686981493299303e-06, |
|
"loss": 0.2956, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.365507338864072e-06, |
|
"loss": 0.5714, |
|
"step": 2276 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.3623165283982133e-06, |
|
"loss": 0.3393, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.359125717932355e-06, |
|
"loss": 0.3448, |
|
"step": 2284 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.3559349074664966e-06, |
|
"loss": 0.4956, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.3527440970006385e-06, |
|
"loss": 0.4609, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.34955328653478e-06, |
|
"loss": 0.4499, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.346362476068922e-06, |
|
"loss": 0.3638, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.3431716656030634e-06, |
|
"loss": 0.6062, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.3399808551372053e-06, |
|
"loss": 0.319, |
|
"step": 2308 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.3367900446713467e-06, |
|
"loss": 0.3106, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.3335992342054886e-06, |
|
"loss": 0.6715, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.33040842373963e-06, |
|
"loss": 0.4007, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.3272176132737716e-06, |
|
"loss": 0.5854, |
|
"step": 2324 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.324026802807914e-06, |
|
"loss": 0.4384, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.3208359923420554e-06, |
|
"loss": 0.5186, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.317645181876197e-06, |
|
"loss": 0.2793, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.3144543714103383e-06, |
|
"loss": 0.3945, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.3112635609444806e-06, |
|
"loss": 0.4488, |
|
"step": 2344 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.308072750478622e-06, |
|
"loss": 0.2692, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.3048819400127635e-06, |
|
"loss": 0.4689, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.301691129546905e-06, |
|
"loss": 0.2162, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.2985003190810465e-06, |
|
"loss": 0.3339, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.295309508615189e-06, |
|
"loss": 0.5855, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.2921186981493303e-06, |
|
"loss": 0.4823, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.2889278876834717e-06, |
|
"loss": 0.3587, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.2857370772176132e-06, |
|
"loss": 0.3903, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.2825462667517555e-06, |
|
"loss": 0.5669, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.279355456285897e-06, |
|
"loss": 0.413, |
|
"step": 2384 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.2761646458200385e-06, |
|
"loss": 0.3735, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.27297383535418e-06, |
|
"loss": 0.5467, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.269783024888322e-06, |
|
"loss": 0.3738, |
|
"step": 2396 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.2665922144224637e-06, |
|
"loss": 0.4619, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.263401403956605e-06, |
|
"loss": 0.3739, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.2602105934907467e-06, |
|
"loss": 0.4065, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.2570197830248886e-06, |
|
"loss": 0.3406, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.2538289725590305e-06, |
|
"loss": 0.4554, |
|
"step": 2416 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.250638162093172e-06, |
|
"loss": 0.799, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.247447351627314e-06, |
|
"loss": 0.4552, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.2442565411614553e-06, |
|
"loss": 0.3708, |
|
"step": 2428 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.2410657306955968e-06, |
|
"loss": 0.3309, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.2378749202297387e-06, |
|
"loss": 0.4065, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.2346841097638806e-06, |
|
"loss": 0.627, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.231493299298022e-06, |
|
"loss": 0.3551, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2283024888321635e-06, |
|
"loss": 0.2465, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2251116783663054e-06, |
|
"loss": 0.5104, |
|
"step": 2452 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2219208679004473e-06, |
|
"loss": 0.5923, |
|
"step": 2456 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2187300574345887e-06, |
|
"loss": 0.3455, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2155392469687302e-06, |
|
"loss": 0.3767, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.2123484365028717e-06, |
|
"loss": 0.4728, |
|
"step": 2468 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.209157626037014e-06, |
|
"loss": 0.4427, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.2059668155711555e-06, |
|
"loss": 0.3805, |
|
"step": 2476 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.202776005105297e-06, |
|
"loss": 0.2417, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.1995851946394384e-06, |
|
"loss": 0.4459, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.1963943841735807e-06, |
|
"loss": 0.5951, |
|
"step": 2488 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.193203573707722e-06, |
|
"loss": 0.4512, |
|
"step": 2492 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.1900127632418637e-06, |
|
"loss": 0.4038, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.186821952776005e-06, |
|
"loss": 0.5716, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.1836311423101466e-06, |
|
"loss": 0.3122, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.180440331844289e-06, |
|
"loss": 0.6523, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.1772495213784304e-06, |
|
"loss": 0.4129, |
|
"step": 2512 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.174058710912572e-06, |
|
"loss": 0.7674, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.1708679004467138e-06, |
|
"loss": 0.5227, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.1676770899808557e-06, |
|
"loss": 0.457, |
|
"step": 2524 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.164486279514997e-06, |
|
"loss": 0.3279, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.1612954690491386e-06, |
|
"loss": 0.4809, |
|
"step": 2532 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.1581046585832805e-06, |
|
"loss": 0.3513, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.154913848117422e-06, |
|
"loss": 0.5097, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.151723037651564e-06, |
|
"loss": 0.4727, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.1485322271857053e-06, |
|
"loss": 0.3848, |
|
"step": 2548 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.1453414167198472e-06, |
|
"loss": 0.49, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.1421506062539887e-06, |
|
"loss": 0.4166, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.1389597957881306e-06, |
|
"loss": 0.4397, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.135768985322272e-06, |
|
"loss": 0.4295, |
|
"step": 2564 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.132578174856414e-06, |
|
"loss": 0.3807, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.1293873643905554e-06, |
|
"loss": 0.5155, |
|
"step": 2572 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.126196553924697e-06, |
|
"loss": 0.4183, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.123005743458839e-06, |
|
"loss": 0.4173, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.1198149329929807e-06, |
|
"loss": 0.5842, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.116624122527122e-06, |
|
"loss": 0.6673, |
|
"step": 2588 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.1134333120612636e-06, |
|
"loss": 0.4492, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.110242501595406e-06, |
|
"loss": 0.4747, |
|
"step": 2596 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.1070516911295474e-06, |
|
"loss": 0.5746, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.103860880663689e-06, |
|
"loss": 0.5708, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.1006700701978303e-06, |
|
"loss": 0.601, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.097479259731972e-06, |
|
"loss": 0.7205, |
|
"step": 2612 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.094288449266114e-06, |
|
"loss": 0.4023, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.0910976388002556e-06, |
|
"loss": 0.5012, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.087906828334397e-06, |
|
"loss": 0.4779, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.0847160178685385e-06, |
|
"loss": 0.5062, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.081525207402681e-06, |
|
"loss": 0.2888, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.0783343969368223e-06, |
|
"loss": 0.4121, |
|
"step": 2636 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.075143586470964e-06, |
|
"loss": 0.4721, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.0719527760051053e-06, |
|
"loss": 0.3422, |
|
"step": 2644 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.068761965539247e-06, |
|
"loss": 0.5018, |
|
"step": 2648 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.065571155073389e-06, |
|
"loss": 0.5165, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.0623803446075305e-06, |
|
"loss": 0.3574, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.059189534141672e-06, |
|
"loss": 0.579, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.055998723675814e-06, |
|
"loss": 0.4961, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.0528079132099558e-06, |
|
"loss": 0.3949, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.0496171027440973e-06, |
|
"loss": 0.3999, |
|
"step": 2672 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.046426292278239e-06, |
|
"loss": 0.524, |
|
"step": 2676 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.0432354818123806e-06, |
|
"loss": 0.3688, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.040044671346522e-06, |
|
"loss": 0.403, |
|
"step": 2684 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.036853860880664e-06, |
|
"loss": 0.3388, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.033663050414806e-06, |
|
"loss": 0.4706, |
|
"step": 2692 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.0304722399489473e-06, |
|
"loss": 0.6817, |
|
"step": 2696 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.027281429483089e-06, |
|
"loss": 0.3896, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.0240906190172307e-06, |
|
"loss": 0.358, |
|
"step": 2704 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.0208998085513726e-06, |
|
"loss": 0.3115, |
|
"step": 2708 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.017708998085514e-06, |
|
"loss": 0.5322, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.0145181876196555e-06, |
|
"loss": 0.4613, |
|
"step": 2716 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.011327377153797e-06, |
|
"loss": 0.4374, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.0081365666879393e-06, |
|
"loss": 0.4775, |
|
"step": 2724 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.004945756222081e-06, |
|
"loss": 0.349, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.0017549457562223e-06, |
|
"loss": 0.5114, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.9985641352903637e-06, |
|
"loss": 0.3901, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.995373324824506e-06, |
|
"loss": 0.4756, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.9921825143586475e-06, |
|
"loss": 0.4669, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.988991703892789e-06, |
|
"loss": 0.5554, |
|
"step": 2748 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.9858008934269305e-06, |
|
"loss": 0.3345, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.982610082961072e-06, |
|
"loss": 0.3653, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.9794192724952143e-06, |
|
"loss": 0.4543, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.9762284620293557e-06, |
|
"loss": 0.382, |
|
"step": 2764 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.973037651563497e-06, |
|
"loss": 0.2821, |
|
"step": 2768 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.969846841097639e-06, |
|
"loss": 0.4392, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.966656030631781e-06, |
|
"loss": 0.3785, |
|
"step": 2776 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.9634652201659224e-06, |
|
"loss": 0.4799, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.960274409700064e-06, |
|
"loss": 0.4004, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.957083599234206e-06, |
|
"loss": 0.4598, |
|
"step": 2788 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.9538927887683473e-06, |
|
"loss": 0.6889, |
|
"step": 2792 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.950701978302489e-06, |
|
"loss": 0.3401, |
|
"step": 2796 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.9475111678366306e-06, |
|
"loss": 0.5162, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.9443203573707725e-06, |
|
"loss": 0.3811, |
|
"step": 2804 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.941129546904914e-06, |
|
"loss": 0.3048, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.937938736439056e-06, |
|
"loss": 0.5528, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9347479259731974e-06, |
|
"loss": 0.3721, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9315571155073393e-06, |
|
"loss": 0.4877, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9283663050414807e-06, |
|
"loss": 0.3101, |
|
"step": 2824 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.925175494575622e-06, |
|
"loss": 0.3458, |
|
"step": 2828 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9219846841097645e-06, |
|
"loss": 0.3741, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.918793873643906e-06, |
|
"loss": 0.7428, |
|
"step": 2836 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.9156030631780475e-06, |
|
"loss": 0.3487, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.912412252712189e-06, |
|
"loss": 0.3184, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.9092214422463313e-06, |
|
"loss": 0.3778, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.9060306317804727e-06, |
|
"loss": 0.4085, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.902839821314614e-06, |
|
"loss": 0.4859, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.8996490108487557e-06, |
|
"loss": 0.5783, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.896458200382897e-06, |
|
"loss": 0.2396, |
|
"step": 2864 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.8932673899170395e-06, |
|
"loss": 0.6563, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.890076579451181e-06, |
|
"loss": 0.3048, |
|
"step": 2872 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.8868857689853224e-06, |
|
"loss": 0.3935, |
|
"step": 2876 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.883694958519464e-06, |
|
"loss": 0.5063, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.880504148053606e-06, |
|
"loss": 0.5056, |
|
"step": 2884 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.8773133375877476e-06, |
|
"loss": 0.2203, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.874122527121889e-06, |
|
"loss": 0.3044, |
|
"step": 2892 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.8709317166560306e-06, |
|
"loss": 0.4444, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.8677409061901725e-06, |
|
"loss": 0.3645, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.8645500957243144e-06, |
|
"loss": 0.4594, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.861359285258456e-06, |
|
"loss": 0.4897, |
|
"step": 2908 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.8581684747925973e-06, |
|
"loss": 0.4772, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.854977664326739e-06, |
|
"loss": 0.388, |
|
"step": 2916 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.851786853860881e-06, |
|
"loss": 0.3869, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8485960433950226e-06, |
|
"loss": 0.4853, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8454052329291645e-06, |
|
"loss": 0.4467, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.842214422463306e-06, |
|
"loss": 0.2356, |
|
"step": 2932 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8390236119974474e-06, |
|
"loss": 0.4614, |
|
"step": 2936 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8358328015315893e-06, |
|
"loss": 0.3212, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.832641991065731e-06, |
|
"loss": 0.5037, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.8294511805998727e-06, |
|
"loss": 0.4957, |
|
"step": 2948 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.826260370134014e-06, |
|
"loss": 0.418, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.823069559668156e-06, |
|
"loss": 0.2996, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.819878749202298e-06, |
|
"loss": 0.5421, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.8166879387364394e-06, |
|
"loss": 0.5049, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.813497128270581e-06, |
|
"loss": 0.3929, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.8103063178047223e-06, |
|
"loss": 0.4045, |
|
"step": 2972 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.8071155073388646e-06, |
|
"loss": 0.3494, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.803924696873006e-06, |
|
"loss": 0.3782, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.8007338864071476e-06, |
|
"loss": 0.2768, |
|
"step": 2984 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.797543075941289e-06, |
|
"loss": 0.531, |
|
"step": 2988 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.7943522654754314e-06, |
|
"loss": 0.4958, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.791161455009573e-06, |
|
"loss": 0.6183, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.7879706445437143e-06, |
|
"loss": 0.3521, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.7847798340778558e-06, |
|
"loss": 0.4406, |
|
"step": 3004 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7815890236119973e-06, |
|
"loss": 0.4131, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7783982131461396e-06, |
|
"loss": 0.5107, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.775207402680281e-06, |
|
"loss": 0.2735, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7720165922144225e-06, |
|
"loss": 0.3788, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7696234843650286e-06, |
|
"loss": 0.5599, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7664326738991705e-06, |
|
"loss": 0.2355, |
|
"step": 3028 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7632418634333124e-06, |
|
"loss": 0.5358, |
|
"step": 3032 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.760051052967454e-06, |
|
"loss": 0.3283, |
|
"step": 3036 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7568602425015958e-06, |
|
"loss": 0.4093, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7536694320357372e-06, |
|
"loss": 0.287, |
|
"step": 3044 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.750478621569879e-06, |
|
"loss": 0.5271, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.7472878111040206e-06, |
|
"loss": 0.3372, |
|
"step": 3052 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.7440970006381625e-06, |
|
"loss": 0.5649, |
|
"step": 3056 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.740906190172304e-06, |
|
"loss": 0.5017, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.7377153797064454e-06, |
|
"loss": 0.6057, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.7345245692405873e-06, |
|
"loss": 0.4184, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.7313337587747292e-06, |
|
"loss": 0.2892, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.7281429483088707e-06, |
|
"loss": 0.5914, |
|
"step": 3076 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.724952137843012e-06, |
|
"loss": 0.472, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.721761327377154e-06, |
|
"loss": 0.3773, |
|
"step": 3084 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.718570516911296e-06, |
|
"loss": 0.2942, |
|
"step": 3088 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.7153797064454374e-06, |
|
"loss": 0.3445, |
|
"step": 3092 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.712188895979579e-06, |
|
"loss": 0.2773, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.7089980855137204e-06, |
|
"loss": 0.4007, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.7058072750478627e-06, |
|
"loss": 0.3083, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.702616464582004e-06, |
|
"loss": 0.4782, |
|
"step": 3108 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6994256541161456e-06, |
|
"loss": 0.5419, |
|
"step": 3112 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.696234843650287e-06, |
|
"loss": 0.5713, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6930440331844294e-06, |
|
"loss": 0.3722, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.689853222718571e-06, |
|
"loss": 0.4663, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6866624122527123e-06, |
|
"loss": 0.3208, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.683471601786854e-06, |
|
"loss": 0.351, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6802807913209957e-06, |
|
"loss": 0.513, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.6770899808551376e-06, |
|
"loss": 0.4409, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.673899170389279e-06, |
|
"loss": 0.3335, |
|
"step": 3144 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.6707083599234205e-06, |
|
"loss": 0.3706, |
|
"step": 3148 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.6675175494575624e-06, |
|
"loss": 0.4404, |
|
"step": 3152 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.6643267389917043e-06, |
|
"loss": 0.4186, |
|
"step": 3156 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.661135928525846e-06, |
|
"loss": 0.3666, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.6579451180599873e-06, |
|
"loss": 0.3849, |
|
"step": 3164 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.654754307594129e-06, |
|
"loss": 0.4564, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.6515634971282706e-06, |
|
"loss": 0.3534, |
|
"step": 3172 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.6483726866624125e-06, |
|
"loss": 0.3735, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.645181876196554e-06, |
|
"loss": 0.4449, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.641991065730696e-06, |
|
"loss": 0.5032, |
|
"step": 3184 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.6388002552648374e-06, |
|
"loss": 0.3677, |
|
"step": 3188 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.6356094447989793e-06, |
|
"loss": 0.5004, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.632418634333121e-06, |
|
"loss": 0.1972, |
|
"step": 3196 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.6292278238672626e-06, |
|
"loss": 0.4606, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.626037013401404e-06, |
|
"loss": 0.3533, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.6228462029355456e-06, |
|
"loss": 0.3607, |
|
"step": 3208 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.619655392469688e-06, |
|
"loss": 0.5767, |
|
"step": 3212 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.6164645820038293e-06, |
|
"loss": 0.5316, |
|
"step": 3216 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.613273771537971e-06, |
|
"loss": 0.2474, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.6100829610721123e-06, |
|
"loss": 0.3168, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.6068921506062546e-06, |
|
"loss": 0.4029, |
|
"step": 3228 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.603701340140396e-06, |
|
"loss": 0.2693, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.6005105296745375e-06, |
|
"loss": 0.3756, |
|
"step": 3236 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.597319719208679e-06, |
|
"loss": 0.3712, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5941289087428205e-06, |
|
"loss": 0.366, |
|
"step": 3244 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.590938098276963e-06, |
|
"loss": 0.3813, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.5877472878111043e-06, |
|
"loss": 0.4442, |
|
"step": 3252 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.5845564773452457e-06, |
|
"loss": 0.4061, |
|
"step": 3256 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.581365666879387e-06, |
|
"loss": 0.3679, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.5781748564135295e-06, |
|
"loss": 0.2641, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.574984045947671e-06, |
|
"loss": 0.5656, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.5717932354818125e-06, |
|
"loss": 0.3672, |
|
"step": 3272 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.568602425015954e-06, |
|
"loss": 0.3395, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.565411614550096e-06, |
|
"loss": 0.5946, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.5622208040842377e-06, |
|
"loss": 0.3526, |
|
"step": 3284 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.559029993618379e-06, |
|
"loss": 0.3365, |
|
"step": 3288 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.555839183152521e-06, |
|
"loss": 0.4003, |
|
"step": 3292 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.5526483726866626e-06, |
|
"loss": 0.3994, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.5494575622208045e-06, |
|
"loss": 0.3623, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.546266751754946e-06, |
|
"loss": 0.5994, |
|
"step": 3304 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.543075941289088e-06, |
|
"loss": 0.3717, |
|
"step": 3308 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.5398851308232293e-06, |
|
"loss": 0.2424, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.5366943203573708e-06, |
|
"loss": 0.5083, |
|
"step": 3316 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.5335035098915127e-06, |
|
"loss": 0.2865, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.5303126994256545e-06, |
|
"loss": 0.2184, |
|
"step": 3324 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.527121888959796e-06, |
|
"loss": 0.5697, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5239310784939375e-06, |
|
"loss": 0.3524, |
|
"step": 3332 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5207402680280794e-06, |
|
"loss": 0.3922, |
|
"step": 3336 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5175494575622213e-06, |
|
"loss": 0.3364, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5143586470963627e-06, |
|
"loss": 0.3983, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5111678366305042e-06, |
|
"loss": 0.3812, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5079770261646457e-06, |
|
"loss": 0.3001, |
|
"step": 3352 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.504786215698788e-06, |
|
"loss": 0.3159, |
|
"step": 3356 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.5015954052329295e-06, |
|
"loss": 0.316, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.498404594767071e-06, |
|
"loss": 0.2347, |
|
"step": 3364 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.495213784301213e-06, |
|
"loss": 0.4728, |
|
"step": 3368 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4920229738353543e-06, |
|
"loss": 0.4451, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.488832163369496e-06, |
|
"loss": 0.6876, |
|
"step": 3376 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.4856413529036377e-06, |
|
"loss": 0.4799, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.482450542437779e-06, |
|
"loss": 0.3912, |
|
"step": 3384 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.479259731971921e-06, |
|
"loss": 0.2295, |
|
"step": 3388 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.4760689215060625e-06, |
|
"loss": 0.2529, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.4728781110402044e-06, |
|
"loss": 0.454, |
|
"step": 3396 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.469687300574346e-06, |
|
"loss": 0.3894, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.4664964901084878e-06, |
|
"loss": 0.2908, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.4633056796426292e-06, |
|
"loss": 0.499, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.460114869176771e-06, |
|
"loss": 0.3336, |
|
"step": 3412 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.4569240587109126e-06, |
|
"loss": 0.3016, |
|
"step": 3416 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.4537332482450545e-06, |
|
"loss": 0.4519, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.450542437779196e-06, |
|
"loss": 0.2589, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.447351627313338e-06, |
|
"loss": 0.404, |
|
"step": 3428 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.4441608168474793e-06, |
|
"loss": 0.335, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.4409700063816212e-06, |
|
"loss": 0.4312, |
|
"step": 3436 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.437779195915763e-06, |
|
"loss": 0.2877, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4345883854499046e-06, |
|
"loss": 0.3591, |
|
"step": 3444 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4313975749840465e-06, |
|
"loss": 0.3149, |
|
"step": 3448 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.428206764518188e-06, |
|
"loss": 0.3785, |
|
"step": 3452 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4250159540523294e-06, |
|
"loss": 0.3654, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4218251435864713e-06, |
|
"loss": 0.2894, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.4186343331206128e-06, |
|
"loss": 0.5198, |
|
"step": 3464 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.4154435226547547e-06, |
|
"loss": 0.4666, |
|
"step": 3468 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.412252712188896e-06, |
|
"loss": 0.3899, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.409061901723038e-06, |
|
"loss": 0.4248, |
|
"step": 3476 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.4058710912571795e-06, |
|
"loss": 0.3144, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.4026802807913214e-06, |
|
"loss": 0.3294, |
|
"step": 3484 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.399489470325463e-06, |
|
"loss": 0.3395, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3962986598596043e-06, |
|
"loss": 0.4384, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3931078493937462e-06, |
|
"loss": 0.3029, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3899170389278877e-06, |
|
"loss": 0.3868, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.3867262284620296e-06, |
|
"loss": 0.233, |
|
"step": 3504 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.383535417996171e-06, |
|
"loss": 0.4025, |
|
"step": 3508 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.380344607530313e-06, |
|
"loss": 0.2714, |
|
"step": 3512 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.3771537970644544e-06, |
|
"loss": 0.4694, |
|
"step": 3516 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.3739629865985963e-06, |
|
"loss": 0.3092, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.370772176132738e-06, |
|
"loss": 0.3375, |
|
"step": 3524 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.3675813656668793e-06, |
|
"loss": 0.2356, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.364390555201021e-06, |
|
"loss": 0.4403, |
|
"step": 3532 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.3611997447351626e-06, |
|
"loss": 0.4015, |
|
"step": 3536 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.3580089342693045e-06, |
|
"loss": 0.5201, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.3548181238034464e-06, |
|
"loss": 0.4203, |
|
"step": 3544 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.351627313337588e-06, |
|
"loss": 0.4869, |
|
"step": 3548 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.3484365028717298e-06, |
|
"loss": 0.3923, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.3452456924058712e-06, |
|
"loss": 0.6743, |
|
"step": 3556 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.342054881940013e-06, |
|
"loss": 0.2588, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.3388640714741546e-06, |
|
"loss": 0.323, |
|
"step": 3564 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.3356732610082965e-06, |
|
"loss": 0.2859, |
|
"step": 3568 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.332482450542438e-06, |
|
"loss": 0.2747, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.32929164007658e-06, |
|
"loss": 0.2221, |
|
"step": 3576 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.3261008296107213e-06, |
|
"loss": 0.3744, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.3229100191448632e-06, |
|
"loss": 0.3965, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.3197192086790047e-06, |
|
"loss": 0.4889, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.3165283982131466e-06, |
|
"loss": 0.4218, |
|
"step": 3592 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.313337587747288e-06, |
|
"loss": 0.3016, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.3101467772814295e-06, |
|
"loss": 0.3408, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.3069559668155714e-06, |
|
"loss": 0.387, |
|
"step": 3604 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.303765156349713e-06, |
|
"loss": 0.3845, |
|
"step": 3608 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.300574345883855e-06, |
|
"loss": 0.2885, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2973835354179963e-06, |
|
"loss": 0.1871, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.294192724952138e-06, |
|
"loss": 0.3516, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2910019144862796e-06, |
|
"loss": 0.4165, |
|
"step": 3624 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2878111040204215e-06, |
|
"loss": 0.2891, |
|
"step": 3628 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.284620293554563e-06, |
|
"loss": 0.3616, |
|
"step": 3632 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.2814294830887045e-06, |
|
"loss": 0.4057, |
|
"step": 3636 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.2782386726228464e-06, |
|
"loss": 0.5166, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.275047862156988e-06, |
|
"loss": 0.3279, |
|
"step": 3644 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.2718570516911297e-06, |
|
"loss": 0.3537, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.268666241225271e-06, |
|
"loss": 0.3187, |
|
"step": 3652 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.265475430759413e-06, |
|
"loss": 0.4043, |
|
"step": 3656 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.2622846202935546e-06, |
|
"loss": 0.2799, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.2590938098276964e-06, |
|
"loss": 0.3363, |
|
"step": 3664 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.255902999361838e-06, |
|
"loss": 0.6477, |
|
"step": 3668 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.25271218889598e-06, |
|
"loss": 0.4967, |
|
"step": 3672 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.2495213784301213e-06, |
|
"loss": 0.4474, |
|
"step": 3676 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.246330567964263e-06, |
|
"loss": 0.2501, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.2431397574984046e-06, |
|
"loss": 0.3448, |
|
"step": 3684 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.2399489470325465e-06, |
|
"loss": 0.3084, |
|
"step": 3688 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.2367581365666884e-06, |
|
"loss": 0.3165, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.23356732610083e-06, |
|
"loss": 0.405, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.230376515634972e-06, |
|
"loss": 0.3648, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.2271857051691133e-06, |
|
"loss": 0.2938, |
|
"step": 3704 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.2239948947032547e-06, |
|
"loss": 0.336, |
|
"step": 3708 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.2208040842373966e-06, |
|
"loss": 0.4741, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.217613273771538e-06, |
|
"loss": 0.4006, |
|
"step": 3716 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.21442246330568e-06, |
|
"loss": 0.3443, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.2112316528398215e-06, |
|
"loss": 0.2771, |
|
"step": 3724 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.2080408423739634e-06, |
|
"loss": 0.2515, |
|
"step": 3728 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.204850031908105e-06, |
|
"loss": 0.3897, |
|
"step": 3732 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.2016592214422467e-06, |
|
"loss": 0.182, |
|
"step": 3736 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.198468410976388e-06, |
|
"loss": 0.3575, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1952776005105297e-06, |
|
"loss": 0.3662, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1920867900446716e-06, |
|
"loss": 0.4394, |
|
"step": 3748 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.188895979578813e-06, |
|
"loss": 0.3541, |
|
"step": 3752 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.185705169112955e-06, |
|
"loss": 0.3837, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1825143586470964e-06, |
|
"loss": 0.2765, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.1793235481812383e-06, |
|
"loss": 0.3349, |
|
"step": 3764 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.1761327377153797e-06, |
|
"loss": 0.3141, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.1729419272495216e-06, |
|
"loss": 0.3836, |
|
"step": 3772 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.169751116783663e-06, |
|
"loss": 0.417, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.1665603063178046e-06, |
|
"loss": 0.339, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1633694958519465e-06, |
|
"loss": 0.4287, |
|
"step": 3784 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1601786853860884e-06, |
|
"loss": 0.3423, |
|
"step": 3788 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.15698787492023e-06, |
|
"loss": 0.3367, |
|
"step": 3792 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.1537970644543717e-06, |
|
"loss": 0.2519, |
|
"step": 3796 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.150606253988513e-06, |
|
"loss": 0.3884, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.147415443522655e-06, |
|
"loss": 0.2767, |
|
"step": 3804 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.1442246330567966e-06, |
|
"loss": 0.3162, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.1410338225909385e-06, |
|
"loss": 0.3722, |
|
"step": 3812 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.13784301212508e-06, |
|
"loss": 0.462, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.134652201659222e-06, |
|
"loss": 0.4508, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.1314613911933633e-06, |
|
"loss": 0.309, |
|
"step": 3824 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.128270580727505e-06, |
|
"loss": 0.4566, |
|
"step": 3828 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1250797702616467e-06, |
|
"loss": 0.3216, |
|
"step": 3832 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1218889597957886e-06, |
|
"loss": 0.4669, |
|
"step": 3836 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.11869814932993e-06, |
|
"loss": 0.4764, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.115507338864072e-06, |
|
"loss": 0.3011, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.1123165283982134e-06, |
|
"loss": 0.3308, |
|
"step": 3848 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.109125717932355e-06, |
|
"loss": 0.4038, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.1059349074664967e-06, |
|
"loss": 0.2768, |
|
"step": 3856 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.1027440970006382e-06, |
|
"loss": 0.374, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.09955328653478e-06, |
|
"loss": 0.3393, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0963624760689216e-06, |
|
"loss": 0.3846, |
|
"step": 3868 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0931716656030635e-06, |
|
"loss": 0.308, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.089980855137205e-06, |
|
"loss": 0.4816, |
|
"step": 3876 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.086790044671347e-06, |
|
"loss": 0.2121, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0835992342054883e-06, |
|
"loss": 0.3698, |
|
"step": 3884 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0804084237396298e-06, |
|
"loss": 0.3615, |
|
"step": 3888 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.0772176132737717e-06, |
|
"loss": 0.2294, |
|
"step": 3892 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.074026802807913e-06, |
|
"loss": 0.2515, |
|
"step": 3896 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.070835992342055e-06, |
|
"loss": 0.3559, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.0676451818761965e-06, |
|
"loss": 0.4243, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.0644543714103384e-06, |
|
"loss": 0.3622, |
|
"step": 3908 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.06126356094448e-06, |
|
"loss": 0.5588, |
|
"step": 3912 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0580727504786218e-06, |
|
"loss": 0.2169, |
|
"step": 3916 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0548819400127632e-06, |
|
"loss": 0.4732, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.051691129546905e-06, |
|
"loss": 0.2331, |
|
"step": 3924 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0485003190810466e-06, |
|
"loss": 0.3388, |
|
"step": 3928 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.0453095086151885e-06, |
|
"loss": 0.4545, |
|
"step": 3932 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.04211869814933e-06, |
|
"loss": 0.3886, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.038927887683472e-06, |
|
"loss": 0.2233, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.0357370772176138e-06, |
|
"loss": 0.3658, |
|
"step": 3944 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.0325462667517552e-06, |
|
"loss": 0.3229, |
|
"step": 3948 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.029355456285897e-06, |
|
"loss": 0.1759, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.0261646458200386e-06, |
|
"loss": 0.3737, |
|
"step": 3956 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.02297383535418e-06, |
|
"loss": 0.3362, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.019783024888322e-06, |
|
"loss": 0.2873, |
|
"step": 3964 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.0165922144224634e-06, |
|
"loss": 0.3454, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.0134014039566053e-06, |
|
"loss": 0.3428, |
|
"step": 3972 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.0102105934907468e-06, |
|
"loss": 0.4089, |
|
"step": 3976 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.0070197830248887e-06, |
|
"loss": 0.3472, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.00382897255903e-06, |
|
"loss": 0.2868, |
|
"step": 3984 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.000638162093172e-06, |
|
"loss": 0.3088, |
|
"step": 3988 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9974473516273135e-06, |
|
"loss": 0.2471, |
|
"step": 3992 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.994256541161455e-06, |
|
"loss": 0.2816, |
|
"step": 3996 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.991065730695597e-06, |
|
"loss": 0.3135, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9878749202297383e-06, |
|
"loss": 0.379, |
|
"step": 4004 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9846841097638802e-06, |
|
"loss": 0.5225, |
|
"step": 4008 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9814932992980217e-06, |
|
"loss": 0.3229, |
|
"step": 4012 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9783024888321636e-06, |
|
"loss": 0.3573, |
|
"step": 4016 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.975111678366305e-06, |
|
"loss": 0.2219, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.971920867900447e-06, |
|
"loss": 0.2133, |
|
"step": 4024 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.9687300574345884e-06, |
|
"loss": 0.4303, |
|
"step": 4028 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.96553924696873e-06, |
|
"loss": 0.4735, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.9631461391193364e-06, |
|
"loss": 0.3223, |
|
"step": 4036 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.959955328653478e-06, |
|
"loss": 0.3124, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.95676451818762e-06, |
|
"loss": 0.4547, |
|
"step": 4044 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.9535737077217613e-06, |
|
"loss": 0.3089, |
|
"step": 4048 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.950382897255903e-06, |
|
"loss": 0.344, |
|
"step": 4052 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.947192086790045e-06, |
|
"loss": 0.1488, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.9440012763241865e-06, |
|
"loss": 0.4715, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.9408104658583284e-06, |
|
"loss": 0.2866, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.93761965539247e-06, |
|
"loss": 0.3207, |
|
"step": 4068 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.9344288449266118e-06, |
|
"loss": 0.3532, |
|
"step": 4072 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.9312380344607532e-06, |
|
"loss": 0.3416, |
|
"step": 4076 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.928047223994895e-06, |
|
"loss": 0.6239, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.9248564135290366e-06, |
|
"loss": 0.1806, |
|
"step": 4084 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.9216656030631785e-06, |
|
"loss": 0.3065, |
|
"step": 4088 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.91847479259732e-06, |
|
"loss": 0.2393, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.9152839821314614e-06, |
|
"loss": 0.4581, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.9120931716656033e-06, |
|
"loss": 0.2407, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.908902361199745e-06, |
|
"loss": 0.3328, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.9057115507338867e-06, |
|
"loss": 0.2898, |
|
"step": 4108 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.9025207402680282e-06, |
|
"loss": 0.5888, |
|
"step": 4112 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.89932992980217e-06, |
|
"loss": 0.3909, |
|
"step": 4116 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8961391193363115e-06, |
|
"loss": 0.2613, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8929483088704534e-06, |
|
"loss": 0.2594, |
|
"step": 4124 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.889757498404595e-06, |
|
"loss": 0.3601, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8865666879387366e-06, |
|
"loss": 0.1791, |
|
"step": 4132 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8833758774728783e-06, |
|
"loss": 0.3714, |
|
"step": 4136 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.88018506700702e-06, |
|
"loss": 0.3601, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8769942565411616e-06, |
|
"loss": 0.4697, |
|
"step": 4144 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8738034460753033e-06, |
|
"loss": 0.4277, |
|
"step": 4148 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.870612635609445e-06, |
|
"loss": 0.4183, |
|
"step": 4152 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8674218251435867e-06, |
|
"loss": 0.2764, |
|
"step": 4156 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8642310146777281e-06, |
|
"loss": 0.3209, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.86104020421187e-06, |
|
"loss": 0.328, |
|
"step": 4164 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8578493937460115e-06, |
|
"loss": 0.3673, |
|
"step": 4168 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8546585832801534e-06, |
|
"loss": 0.2856, |
|
"step": 4172 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8514677728142949e-06, |
|
"loss": 0.4248, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8482769623484368e-06, |
|
"loss": 0.419, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8450861518825782e-06, |
|
"loss": 0.3315, |
|
"step": 4184 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8418953414167201e-06, |
|
"loss": 0.3508, |
|
"step": 4188 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8387045309508616e-06, |
|
"loss": 0.2016, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.8355137204850033e-06, |
|
"loss": 0.2352, |
|
"step": 4196 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.832322910019145e-06, |
|
"loss": 0.4638, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.8291320995532866e-06, |
|
"loss": 0.4352, |
|
"step": 4204 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.8259412890874283e-06, |
|
"loss": 0.4832, |
|
"step": 4208 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.82275047862157e-06, |
|
"loss": 0.295, |
|
"step": 4212 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.8195596681557117e-06, |
|
"loss": 0.3176, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.8163688576898534e-06, |
|
"loss": 0.0922, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.813178047223995e-06, |
|
"loss": 0.2375, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.8099872367581367e-06, |
|
"loss": 0.3374, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.8067964262922782e-06, |
|
"loss": 0.2551, |
|
"step": 4232 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.80360561582642e-06, |
|
"loss": 0.3228, |
|
"step": 4236 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.8004148053605616e-06, |
|
"loss": 0.3102, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7972239948947035e-06, |
|
"loss": 0.2471, |
|
"step": 4244 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.794033184428845e-06, |
|
"loss": 0.285, |
|
"step": 4248 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7908423739629868e-06, |
|
"loss": 0.3468, |
|
"step": 4252 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7876515634971283e-06, |
|
"loss": 0.2877, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7844607530312702e-06, |
|
"loss": 0.4362, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7812699425654117e-06, |
|
"loss": 0.1789, |
|
"step": 4264 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7780791320995533e-06, |
|
"loss": 0.3056, |
|
"step": 4268 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.774888321633695e-06, |
|
"loss": 0.478, |
|
"step": 4272 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7716975111678367e-06, |
|
"loss": 0.3405, |
|
"step": 4276 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7685067007019786e-06, |
|
"loss": 0.2038, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.76531589023612e-06, |
|
"loss": 0.2301, |
|
"step": 4284 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.762125079770262e-06, |
|
"loss": 0.3283, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7589342693044034e-06, |
|
"loss": 0.1711, |
|
"step": 4292 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7557434588385453e-06, |
|
"loss": 0.241, |
|
"step": 4296 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7525526483726868e-06, |
|
"loss": 0.2408, |
|
"step": 4300 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 6468, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 43550404509696.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|