zephyr-math / checkpoint-18384 /trainer_state.json
rishiraj's picture
Upload folder using huggingface_hub
c3a18e8
raw
history blame
224 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 18384,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.2608695652173915e-07,
"loss": 1.057,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 6.884057971014494e-07,
"loss": 1.0404,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 1.0144927536231885e-06,
"loss": 1.0415,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 1.3405797101449278e-06,
"loss": 1.0467,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 1.7028985507246378e-06,
"loss": 1.0394,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 2.065217391304348e-06,
"loss": 1.0325,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 2.4275362318840583e-06,
"loss": 1.0236,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 2.7898550724637686e-06,
"loss": 1.0099,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 3.152173913043479e-06,
"loss": 1.024,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 3.5144927536231887e-06,
"loss": 0.9877,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 3.8768115942028985e-06,
"loss": 0.9921,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 4.239130434782609e-06,
"loss": 0.982,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 4.601449275362319e-06,
"loss": 0.9623,
"step": 130
},
{
"epoch": 0.02,
"learning_rate": 4.963768115942029e-06,
"loss": 0.9441,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 5.3260869565217395e-06,
"loss": 0.9225,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 5.688405797101449e-06,
"loss": 0.9129,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 6.05072463768116e-06,
"loss": 0.8737,
"step": 170
},
{
"epoch": 0.03,
"learning_rate": 6.41304347826087e-06,
"loss": 0.8554,
"step": 180
},
{
"epoch": 0.03,
"learning_rate": 6.7753623188405805e-06,
"loss": 0.8249,
"step": 190
},
{
"epoch": 0.03,
"learning_rate": 7.13768115942029e-06,
"loss": 0.796,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 7.500000000000001e-06,
"loss": 0.7771,
"step": 210
},
{
"epoch": 0.04,
"learning_rate": 7.862318840579712e-06,
"loss": 0.7432,
"step": 220
},
{
"epoch": 0.04,
"learning_rate": 8.22463768115942e-06,
"loss": 0.7279,
"step": 230
},
{
"epoch": 0.04,
"learning_rate": 8.586956521739131e-06,
"loss": 0.7114,
"step": 240
},
{
"epoch": 0.04,
"learning_rate": 8.94927536231884e-06,
"loss": 0.6932,
"step": 250
},
{
"epoch": 0.04,
"learning_rate": 9.275362318840581e-06,
"loss": 0.726,
"step": 260
},
{
"epoch": 0.04,
"learning_rate": 9.63768115942029e-06,
"loss": 0.6796,
"step": 270
},
{
"epoch": 0.05,
"learning_rate": 1e-05,
"loss": 0.6754,
"step": 280
},
{
"epoch": 0.05,
"learning_rate": 1.036231884057971e-05,
"loss": 0.6558,
"step": 290
},
{
"epoch": 0.05,
"learning_rate": 1.0724637681159422e-05,
"loss": 0.6553,
"step": 300
},
{
"epoch": 0.05,
"learning_rate": 1.1086956521739131e-05,
"loss": 0.6355,
"step": 310
},
{
"epoch": 0.05,
"learning_rate": 1.1449275362318842e-05,
"loss": 0.6364,
"step": 320
},
{
"epoch": 0.05,
"learning_rate": 1.181159420289855e-05,
"loss": 0.6412,
"step": 330
},
{
"epoch": 0.06,
"learning_rate": 1.2173913043478263e-05,
"loss": 0.6304,
"step": 340
},
{
"epoch": 0.06,
"learning_rate": 1.2536231884057972e-05,
"loss": 0.6308,
"step": 350
},
{
"epoch": 0.06,
"learning_rate": 1.2898550724637681e-05,
"loss": 0.6139,
"step": 360
},
{
"epoch": 0.06,
"learning_rate": 1.3260869565217392e-05,
"loss": 0.6292,
"step": 370
},
{
"epoch": 0.06,
"learning_rate": 1.3623188405797103e-05,
"loss": 0.6259,
"step": 380
},
{
"epoch": 0.06,
"learning_rate": 1.3985507246376813e-05,
"loss": 0.6165,
"step": 390
},
{
"epoch": 0.07,
"learning_rate": 1.4347826086956522e-05,
"loss": 0.6156,
"step": 400
},
{
"epoch": 0.07,
"learning_rate": 1.4710144927536235e-05,
"loss": 0.622,
"step": 410
},
{
"epoch": 0.07,
"learning_rate": 1.5072463768115944e-05,
"loss": 0.6195,
"step": 420
},
{
"epoch": 0.07,
"learning_rate": 1.5434782608695654e-05,
"loss": 0.612,
"step": 430
},
{
"epoch": 0.07,
"learning_rate": 1.5797101449275363e-05,
"loss": 0.6231,
"step": 440
},
{
"epoch": 0.07,
"learning_rate": 1.6159420289855076e-05,
"loss": 0.6172,
"step": 450
},
{
"epoch": 0.08,
"learning_rate": 1.6521739130434785e-05,
"loss": 0.6111,
"step": 460
},
{
"epoch": 0.08,
"learning_rate": 1.6884057971014494e-05,
"loss": 0.5962,
"step": 470
},
{
"epoch": 0.08,
"learning_rate": 1.7246376811594206e-05,
"loss": 0.621,
"step": 480
},
{
"epoch": 0.08,
"learning_rate": 1.7608695652173915e-05,
"loss": 0.5874,
"step": 490
},
{
"epoch": 0.08,
"learning_rate": 1.7971014492753624e-05,
"loss": 0.5981,
"step": 500
},
{
"epoch": 0.08,
"learning_rate": 1.8333333333333333e-05,
"loss": 0.6084,
"step": 510
},
{
"epoch": 0.08,
"learning_rate": 1.8695652173913045e-05,
"loss": 0.6028,
"step": 520
},
{
"epoch": 0.09,
"learning_rate": 1.9057971014492754e-05,
"loss": 0.6004,
"step": 530
},
{
"epoch": 0.09,
"learning_rate": 1.9420289855072467e-05,
"loss": 0.5926,
"step": 540
},
{
"epoch": 0.09,
"learning_rate": 1.9782608695652176e-05,
"loss": 0.5886,
"step": 550
},
{
"epoch": 0.09,
"learning_rate": 1.9995513683266043e-05,
"loss": 0.5958,
"step": 560
},
{
"epoch": 0.09,
"learning_rate": 1.9984297891431138e-05,
"loss": 0.596,
"step": 570
},
{
"epoch": 0.09,
"learning_rate": 1.9973082099596232e-05,
"loss": 0.5946,
"step": 580
},
{
"epoch": 0.1,
"learning_rate": 1.996186630776133e-05,
"loss": 0.6011,
"step": 590
},
{
"epoch": 0.1,
"learning_rate": 1.9950650515926425e-05,
"loss": 0.5956,
"step": 600
},
{
"epoch": 0.1,
"learning_rate": 1.9939434724091523e-05,
"loss": 0.5838,
"step": 610
},
{
"epoch": 0.1,
"learning_rate": 1.992821893225662e-05,
"loss": 0.5925,
"step": 620
},
{
"epoch": 0.1,
"learning_rate": 1.9917003140421716e-05,
"loss": 0.5697,
"step": 630
},
{
"epoch": 0.1,
"learning_rate": 1.990578734858681e-05,
"loss": 0.5766,
"step": 640
},
{
"epoch": 0.11,
"learning_rate": 1.989457155675191e-05,
"loss": 0.5821,
"step": 650
},
{
"epoch": 0.11,
"learning_rate": 1.9883355764917006e-05,
"loss": 0.5945,
"step": 660
},
{
"epoch": 0.11,
"learning_rate": 1.98721399730821e-05,
"loss": 0.5798,
"step": 670
},
{
"epoch": 0.11,
"learning_rate": 1.98609241812472e-05,
"loss": 0.5778,
"step": 680
},
{
"epoch": 0.11,
"learning_rate": 1.9849708389412294e-05,
"loss": 0.5762,
"step": 690
},
{
"epoch": 0.11,
"learning_rate": 1.983849259757739e-05,
"loss": 0.5755,
"step": 700
},
{
"epoch": 0.12,
"learning_rate": 1.9827276805742487e-05,
"loss": 0.5721,
"step": 710
},
{
"epoch": 0.12,
"learning_rate": 1.9816061013907585e-05,
"loss": 0.5892,
"step": 720
},
{
"epoch": 0.12,
"learning_rate": 1.980484522207268e-05,
"loss": 0.5829,
"step": 730
},
{
"epoch": 0.12,
"learning_rate": 1.9793629430237777e-05,
"loss": 0.5775,
"step": 740
},
{
"epoch": 0.12,
"learning_rate": 1.9782413638402872e-05,
"loss": 0.5792,
"step": 750
},
{
"epoch": 0.12,
"learning_rate": 1.977119784656797e-05,
"loss": 0.579,
"step": 760
},
{
"epoch": 0.13,
"learning_rate": 1.9759982054733068e-05,
"loss": 0.5767,
"step": 770
},
{
"epoch": 0.13,
"learning_rate": 1.9748766262898163e-05,
"loss": 0.5719,
"step": 780
},
{
"epoch": 0.13,
"learning_rate": 1.9737550471063257e-05,
"loss": 0.5804,
"step": 790
},
{
"epoch": 0.13,
"learning_rate": 1.9726334679228355e-05,
"loss": 0.5908,
"step": 800
},
{
"epoch": 0.13,
"learning_rate": 1.971511888739345e-05,
"loss": 0.5737,
"step": 810
},
{
"epoch": 0.13,
"learning_rate": 1.9703903095558548e-05,
"loss": 0.5715,
"step": 820
},
{
"epoch": 0.14,
"learning_rate": 1.9692687303723646e-05,
"loss": 0.5866,
"step": 830
},
{
"epoch": 0.14,
"learning_rate": 1.968147151188874e-05,
"loss": 0.5663,
"step": 840
},
{
"epoch": 0.14,
"learning_rate": 1.9670255720053836e-05,
"loss": 0.5799,
"step": 850
},
{
"epoch": 0.14,
"learning_rate": 1.9659039928218934e-05,
"loss": 0.5682,
"step": 860
},
{
"epoch": 0.14,
"learning_rate": 1.964782413638403e-05,
"loss": 0.5795,
"step": 870
},
{
"epoch": 0.14,
"learning_rate": 1.9636608344549126e-05,
"loss": 0.5683,
"step": 880
},
{
"epoch": 0.15,
"learning_rate": 1.9625392552714224e-05,
"loss": 0.5683,
"step": 890
},
{
"epoch": 0.15,
"learning_rate": 1.961417676087932e-05,
"loss": 0.5645,
"step": 900
},
{
"epoch": 0.15,
"learning_rate": 1.9602960969044417e-05,
"loss": 0.572,
"step": 910
},
{
"epoch": 0.15,
"learning_rate": 1.9591745177209515e-05,
"loss": 0.5552,
"step": 920
},
{
"epoch": 0.15,
"learning_rate": 1.958052938537461e-05,
"loss": 0.5713,
"step": 930
},
{
"epoch": 0.15,
"learning_rate": 1.9569313593539704e-05,
"loss": 0.5688,
"step": 940
},
{
"epoch": 0.16,
"learning_rate": 1.9558097801704803e-05,
"loss": 0.5666,
"step": 950
},
{
"epoch": 0.16,
"learning_rate": 1.9546882009869897e-05,
"loss": 0.566,
"step": 960
},
{
"epoch": 0.16,
"learning_rate": 1.9535666218034995e-05,
"loss": 0.5658,
"step": 970
},
{
"epoch": 0.16,
"learning_rate": 1.9524450426200093e-05,
"loss": 0.5718,
"step": 980
},
{
"epoch": 0.16,
"learning_rate": 1.9513234634365188e-05,
"loss": 0.5559,
"step": 990
},
{
"epoch": 0.16,
"learning_rate": 1.9502018842530283e-05,
"loss": 0.5644,
"step": 1000
},
{
"epoch": 0.16,
"learning_rate": 1.949080305069538e-05,
"loss": 0.5648,
"step": 1010
},
{
"epoch": 0.17,
"learning_rate": 1.947958725886048e-05,
"loss": 0.562,
"step": 1020
},
{
"epoch": 0.17,
"learning_rate": 1.9468371467025573e-05,
"loss": 0.5652,
"step": 1030
},
{
"epoch": 0.17,
"learning_rate": 1.945715567519067e-05,
"loss": 0.5679,
"step": 1040
},
{
"epoch": 0.17,
"learning_rate": 1.9445939883355766e-05,
"loss": 0.5643,
"step": 1050
},
{
"epoch": 0.17,
"learning_rate": 1.943472409152086e-05,
"loss": 0.5566,
"step": 1060
},
{
"epoch": 0.17,
"learning_rate": 1.942350829968596e-05,
"loss": 0.566,
"step": 1070
},
{
"epoch": 0.18,
"learning_rate": 1.9412292507851057e-05,
"loss": 0.5593,
"step": 1080
},
{
"epoch": 0.18,
"learning_rate": 1.940107671601615e-05,
"loss": 0.5719,
"step": 1090
},
{
"epoch": 0.18,
"learning_rate": 1.938986092418125e-05,
"loss": 0.5686,
"step": 1100
},
{
"epoch": 0.18,
"learning_rate": 1.9378645132346344e-05,
"loss": 0.5606,
"step": 1110
},
{
"epoch": 0.18,
"learning_rate": 1.9367429340511442e-05,
"loss": 0.5578,
"step": 1120
},
{
"epoch": 0.18,
"learning_rate": 1.935621354867654e-05,
"loss": 0.5519,
"step": 1130
},
{
"epoch": 0.19,
"learning_rate": 1.9344997756841635e-05,
"loss": 0.5563,
"step": 1140
},
{
"epoch": 0.19,
"learning_rate": 1.933378196500673e-05,
"loss": 0.5595,
"step": 1150
},
{
"epoch": 0.19,
"learning_rate": 1.9322566173171828e-05,
"loss": 0.55,
"step": 1160
},
{
"epoch": 0.19,
"learning_rate": 1.9311350381336926e-05,
"loss": 0.5602,
"step": 1170
},
{
"epoch": 0.19,
"learning_rate": 1.930013458950202e-05,
"loss": 0.5553,
"step": 1180
},
{
"epoch": 0.19,
"learning_rate": 1.928891879766712e-05,
"loss": 0.5724,
"step": 1190
},
{
"epoch": 0.2,
"learning_rate": 1.9277703005832213e-05,
"loss": 0.5535,
"step": 1200
},
{
"epoch": 0.2,
"learning_rate": 1.9266487213997308e-05,
"loss": 0.5615,
"step": 1210
},
{
"epoch": 0.2,
"learning_rate": 1.9255271422162406e-05,
"loss": 0.5656,
"step": 1220
},
{
"epoch": 0.2,
"learning_rate": 1.9244055630327504e-05,
"loss": 0.5501,
"step": 1230
},
{
"epoch": 0.2,
"learning_rate": 1.92328398384926e-05,
"loss": 0.5659,
"step": 1240
},
{
"epoch": 0.2,
"learning_rate": 1.9221624046657697e-05,
"loss": 0.5625,
"step": 1250
},
{
"epoch": 0.21,
"learning_rate": 1.921040825482279e-05,
"loss": 0.5602,
"step": 1260
},
{
"epoch": 0.21,
"learning_rate": 1.919919246298789e-05,
"loss": 0.5519,
"step": 1270
},
{
"epoch": 0.21,
"learning_rate": 1.9187976671152987e-05,
"loss": 0.549,
"step": 1280
},
{
"epoch": 0.21,
"learning_rate": 1.9176760879318082e-05,
"loss": 0.5636,
"step": 1290
},
{
"epoch": 0.21,
"learning_rate": 1.9165545087483177e-05,
"loss": 0.553,
"step": 1300
},
{
"epoch": 0.21,
"learning_rate": 1.9154329295648275e-05,
"loss": 0.5597,
"step": 1310
},
{
"epoch": 0.22,
"learning_rate": 1.914311350381337e-05,
"loss": 0.5419,
"step": 1320
},
{
"epoch": 0.22,
"learning_rate": 1.9131897711978467e-05,
"loss": 0.5368,
"step": 1330
},
{
"epoch": 0.22,
"learning_rate": 1.9120681920143566e-05,
"loss": 0.555,
"step": 1340
},
{
"epoch": 0.22,
"learning_rate": 1.910946612830866e-05,
"loss": 0.5579,
"step": 1350
},
{
"epoch": 0.22,
"learning_rate": 1.9098250336473755e-05,
"loss": 0.5479,
"step": 1360
},
{
"epoch": 0.22,
"learning_rate": 1.9087034544638853e-05,
"loss": 0.563,
"step": 1370
},
{
"epoch": 0.23,
"learning_rate": 1.907581875280395e-05,
"loss": 0.5491,
"step": 1380
},
{
"epoch": 0.23,
"learning_rate": 1.9064602960969046e-05,
"loss": 0.5491,
"step": 1390
},
{
"epoch": 0.23,
"learning_rate": 1.9053387169134144e-05,
"loss": 0.5511,
"step": 1400
},
{
"epoch": 0.23,
"learning_rate": 1.904217137729924e-05,
"loss": 0.5562,
"step": 1410
},
{
"epoch": 0.23,
"learning_rate": 1.9030955585464333e-05,
"loss": 0.5568,
"step": 1420
},
{
"epoch": 0.23,
"learning_rate": 1.901973979362943e-05,
"loss": 0.5497,
"step": 1430
},
{
"epoch": 0.23,
"learning_rate": 1.900852400179453e-05,
"loss": 0.5571,
"step": 1440
},
{
"epoch": 0.24,
"learning_rate": 1.8997308209959624e-05,
"loss": 0.5538,
"step": 1450
},
{
"epoch": 0.24,
"learning_rate": 1.8986092418124722e-05,
"loss": 0.5479,
"step": 1460
},
{
"epoch": 0.24,
"learning_rate": 1.8974876626289816e-05,
"loss": 0.556,
"step": 1470
},
{
"epoch": 0.24,
"learning_rate": 1.8963660834454915e-05,
"loss": 0.5588,
"step": 1480
},
{
"epoch": 0.24,
"learning_rate": 1.8952445042620013e-05,
"loss": 0.5358,
"step": 1490
},
{
"epoch": 0.24,
"learning_rate": 1.8941229250785107e-05,
"loss": 0.5424,
"step": 1500
},
{
"epoch": 0.25,
"learning_rate": 1.8930013458950202e-05,
"loss": 0.5487,
"step": 1510
},
{
"epoch": 0.25,
"learning_rate": 1.89187976671153e-05,
"loss": 0.5383,
"step": 1520
},
{
"epoch": 0.25,
"learning_rate": 1.8907581875280398e-05,
"loss": 0.5493,
"step": 1530
},
{
"epoch": 0.25,
"learning_rate": 1.8896366083445493e-05,
"loss": 0.5604,
"step": 1540
},
{
"epoch": 0.25,
"learning_rate": 1.888515029161059e-05,
"loss": 0.5501,
"step": 1550
},
{
"epoch": 0.25,
"learning_rate": 1.8873934499775685e-05,
"loss": 0.5419,
"step": 1560
},
{
"epoch": 0.26,
"learning_rate": 1.886271870794078e-05,
"loss": 0.5444,
"step": 1570
},
{
"epoch": 0.26,
"learning_rate": 1.8851502916105878e-05,
"loss": 0.5508,
"step": 1580
},
{
"epoch": 0.26,
"learning_rate": 1.8840287124270976e-05,
"loss": 0.5488,
"step": 1590
},
{
"epoch": 0.26,
"learning_rate": 1.882907133243607e-05,
"loss": 0.5653,
"step": 1600
},
{
"epoch": 0.26,
"learning_rate": 1.881785554060117e-05,
"loss": 0.5458,
"step": 1610
},
{
"epoch": 0.26,
"learning_rate": 1.8806639748766264e-05,
"loss": 0.5548,
"step": 1620
},
{
"epoch": 0.27,
"learning_rate": 1.879542395693136e-05,
"loss": 0.5379,
"step": 1630
},
{
"epoch": 0.27,
"learning_rate": 1.878420816509646e-05,
"loss": 0.5567,
"step": 1640
},
{
"epoch": 0.27,
"learning_rate": 1.8772992373261554e-05,
"loss": 0.5523,
"step": 1650
},
{
"epoch": 0.27,
"learning_rate": 1.876177658142665e-05,
"loss": 0.5509,
"step": 1660
},
{
"epoch": 0.27,
"learning_rate": 1.8750560789591747e-05,
"loss": 0.5523,
"step": 1670
},
{
"epoch": 0.27,
"learning_rate": 1.873934499775684e-05,
"loss": 0.5454,
"step": 1680
},
{
"epoch": 0.28,
"learning_rate": 1.872812920592194e-05,
"loss": 0.5513,
"step": 1690
},
{
"epoch": 0.28,
"learning_rate": 1.8716913414087038e-05,
"loss": 0.5462,
"step": 1700
},
{
"epoch": 0.28,
"learning_rate": 1.8705697622252132e-05,
"loss": 0.5468,
"step": 1710
},
{
"epoch": 0.28,
"learning_rate": 1.8694481830417227e-05,
"loss": 0.5398,
"step": 1720
},
{
"epoch": 0.28,
"learning_rate": 1.8683266038582325e-05,
"loss": 0.5456,
"step": 1730
},
{
"epoch": 0.28,
"learning_rate": 1.8672050246747423e-05,
"loss": 0.5361,
"step": 1740
},
{
"epoch": 0.29,
"learning_rate": 1.8660834454912518e-05,
"loss": 0.5508,
"step": 1750
},
{
"epoch": 0.29,
"learning_rate": 1.8649618663077616e-05,
"loss": 0.5339,
"step": 1760
},
{
"epoch": 0.29,
"learning_rate": 1.863840287124271e-05,
"loss": 0.5403,
"step": 1770
},
{
"epoch": 0.29,
"learning_rate": 1.8627187079407805e-05,
"loss": 0.548,
"step": 1780
},
{
"epoch": 0.29,
"learning_rate": 1.8615971287572903e-05,
"loss": 0.5424,
"step": 1790
},
{
"epoch": 0.29,
"learning_rate": 1.8604755495738e-05,
"loss": 0.5378,
"step": 1800
},
{
"epoch": 0.3,
"learning_rate": 1.8593539703903096e-05,
"loss": 0.5549,
"step": 1810
},
{
"epoch": 0.3,
"learning_rate": 1.8582323912068194e-05,
"loss": 0.5467,
"step": 1820
},
{
"epoch": 0.3,
"learning_rate": 1.857110812023329e-05,
"loss": 0.5449,
"step": 1830
},
{
"epoch": 0.3,
"learning_rate": 1.8559892328398387e-05,
"loss": 0.545,
"step": 1840
},
{
"epoch": 0.3,
"learning_rate": 1.8548676536563485e-05,
"loss": 0.5476,
"step": 1850
},
{
"epoch": 0.3,
"learning_rate": 1.853746074472858e-05,
"loss": 0.5323,
"step": 1860
},
{
"epoch": 0.31,
"learning_rate": 1.8526244952893674e-05,
"loss": 0.5532,
"step": 1870
},
{
"epoch": 0.31,
"learning_rate": 1.8515029161058772e-05,
"loss": 0.5405,
"step": 1880
},
{
"epoch": 0.31,
"learning_rate": 1.850381336922387e-05,
"loss": 0.5494,
"step": 1890
},
{
"epoch": 0.31,
"learning_rate": 1.8492597577388965e-05,
"loss": 0.5471,
"step": 1900
},
{
"epoch": 0.31,
"learning_rate": 1.8481381785554063e-05,
"loss": 0.5387,
"step": 1910
},
{
"epoch": 0.31,
"learning_rate": 1.8470165993719158e-05,
"loss": 0.5436,
"step": 1920
},
{
"epoch": 0.31,
"learning_rate": 1.8458950201884252e-05,
"loss": 0.5469,
"step": 1930
},
{
"epoch": 0.32,
"learning_rate": 1.844773441004935e-05,
"loss": 0.529,
"step": 1940
},
{
"epoch": 0.32,
"learning_rate": 1.843651861821445e-05,
"loss": 0.5439,
"step": 1950
},
{
"epoch": 0.32,
"learning_rate": 1.8425302826379543e-05,
"loss": 0.5408,
"step": 1960
},
{
"epoch": 0.32,
"learning_rate": 1.841408703454464e-05,
"loss": 0.5361,
"step": 1970
},
{
"epoch": 0.32,
"learning_rate": 1.8402871242709736e-05,
"loss": 0.5388,
"step": 1980
},
{
"epoch": 0.32,
"learning_rate": 1.8391655450874834e-05,
"loss": 0.5369,
"step": 1990
},
{
"epoch": 0.33,
"learning_rate": 1.8380439659039932e-05,
"loss": 0.5366,
"step": 2000
},
{
"epoch": 0.33,
"learning_rate": 1.8369223867205027e-05,
"loss": 0.5493,
"step": 2010
},
{
"epoch": 0.33,
"learning_rate": 1.835800807537012e-05,
"loss": 0.5346,
"step": 2020
},
{
"epoch": 0.33,
"learning_rate": 1.834679228353522e-05,
"loss": 0.5455,
"step": 2030
},
{
"epoch": 0.33,
"learning_rate": 1.8335576491700314e-05,
"loss": 0.5452,
"step": 2040
},
{
"epoch": 0.33,
"learning_rate": 1.8324360699865412e-05,
"loss": 0.5398,
"step": 2050
},
{
"epoch": 0.34,
"learning_rate": 1.831314490803051e-05,
"loss": 0.5558,
"step": 2060
},
{
"epoch": 0.34,
"learning_rate": 1.8301929116195605e-05,
"loss": 0.5367,
"step": 2070
},
{
"epoch": 0.34,
"learning_rate": 1.82907133243607e-05,
"loss": 0.5377,
"step": 2080
},
{
"epoch": 0.34,
"learning_rate": 1.8279497532525797e-05,
"loss": 0.5487,
"step": 2090
},
{
"epoch": 0.34,
"learning_rate": 1.8268281740690895e-05,
"loss": 0.5455,
"step": 2100
},
{
"epoch": 0.34,
"learning_rate": 1.825706594885599e-05,
"loss": 0.5506,
"step": 2110
},
{
"epoch": 0.35,
"learning_rate": 1.8245850157021088e-05,
"loss": 0.533,
"step": 2120
},
{
"epoch": 0.35,
"learning_rate": 1.8234634365186183e-05,
"loss": 0.5375,
"step": 2130
},
{
"epoch": 0.35,
"learning_rate": 1.8223418573351277e-05,
"loss": 0.5261,
"step": 2140
},
{
"epoch": 0.35,
"learning_rate": 1.8212202781516376e-05,
"loss": 0.5478,
"step": 2150
},
{
"epoch": 0.35,
"learning_rate": 1.8200986989681474e-05,
"loss": 0.5414,
"step": 2160
},
{
"epoch": 0.35,
"learning_rate": 1.8189771197846568e-05,
"loss": 0.5471,
"step": 2170
},
{
"epoch": 0.36,
"learning_rate": 1.8178555406011666e-05,
"loss": 0.53,
"step": 2180
},
{
"epoch": 0.36,
"learning_rate": 1.816733961417676e-05,
"loss": 0.5265,
"step": 2190
},
{
"epoch": 0.36,
"learning_rate": 1.815612382234186e-05,
"loss": 0.5309,
"step": 2200
},
{
"epoch": 0.36,
"learning_rate": 1.8144908030506957e-05,
"loss": 0.5479,
"step": 2210
},
{
"epoch": 0.36,
"learning_rate": 1.8133692238672052e-05,
"loss": 0.546,
"step": 2220
},
{
"epoch": 0.36,
"learning_rate": 1.8122476446837146e-05,
"loss": 0.5355,
"step": 2230
},
{
"epoch": 0.37,
"learning_rate": 1.8111260655002244e-05,
"loss": 0.5308,
"step": 2240
},
{
"epoch": 0.37,
"learning_rate": 1.8100044863167343e-05,
"loss": 0.5306,
"step": 2250
},
{
"epoch": 0.37,
"learning_rate": 1.8088829071332437e-05,
"loss": 0.54,
"step": 2260
},
{
"epoch": 0.37,
"learning_rate": 1.8077613279497535e-05,
"loss": 0.5279,
"step": 2270
},
{
"epoch": 0.37,
"learning_rate": 1.806639748766263e-05,
"loss": 0.5311,
"step": 2280
},
{
"epoch": 0.37,
"learning_rate": 1.8055181695827725e-05,
"loss": 0.5315,
"step": 2290
},
{
"epoch": 0.38,
"learning_rate": 1.8043965903992823e-05,
"loss": 0.5456,
"step": 2300
},
{
"epoch": 0.38,
"learning_rate": 1.803275011215792e-05,
"loss": 0.5342,
"step": 2310
},
{
"epoch": 0.38,
"learning_rate": 1.8021534320323015e-05,
"loss": 0.5445,
"step": 2320
},
{
"epoch": 0.38,
"learning_rate": 1.8010318528488113e-05,
"loss": 0.5295,
"step": 2330
},
{
"epoch": 0.38,
"learning_rate": 1.7999102736653208e-05,
"loss": 0.5337,
"step": 2340
},
{
"epoch": 0.38,
"learning_rate": 1.7987886944818306e-05,
"loss": 0.5375,
"step": 2350
},
{
"epoch": 0.39,
"learning_rate": 1.7976671152983404e-05,
"loss": 0.5292,
"step": 2360
},
{
"epoch": 0.39,
"learning_rate": 1.79654553611485e-05,
"loss": 0.5365,
"step": 2370
},
{
"epoch": 0.39,
"learning_rate": 1.7954239569313593e-05,
"loss": 0.5428,
"step": 2380
},
{
"epoch": 0.39,
"learning_rate": 1.794302377747869e-05,
"loss": 0.5489,
"step": 2390
},
{
"epoch": 0.39,
"learning_rate": 1.7931807985643786e-05,
"loss": 0.5344,
"step": 2400
},
{
"epoch": 0.39,
"learning_rate": 1.7920592193808884e-05,
"loss": 0.5293,
"step": 2410
},
{
"epoch": 0.39,
"learning_rate": 1.7909376401973982e-05,
"loss": 0.5282,
"step": 2420
},
{
"epoch": 0.4,
"learning_rate": 1.7898160610139077e-05,
"loss": 0.5259,
"step": 2430
},
{
"epoch": 0.4,
"learning_rate": 1.788694481830417e-05,
"loss": 0.5316,
"step": 2440
},
{
"epoch": 0.4,
"learning_rate": 1.787572902646927e-05,
"loss": 0.5329,
"step": 2450
},
{
"epoch": 0.4,
"learning_rate": 1.7864513234634368e-05,
"loss": 0.515,
"step": 2460
},
{
"epoch": 0.4,
"learning_rate": 1.7853297442799462e-05,
"loss": 0.5393,
"step": 2470
},
{
"epoch": 0.4,
"learning_rate": 1.784208165096456e-05,
"loss": 0.5301,
"step": 2480
},
{
"epoch": 0.41,
"learning_rate": 1.7830865859129655e-05,
"loss": 0.5246,
"step": 2490
},
{
"epoch": 0.41,
"learning_rate": 1.7819650067294753e-05,
"loss": 0.5313,
"step": 2500
},
{
"epoch": 0.41,
"learning_rate": 1.7808434275459848e-05,
"loss": 0.5329,
"step": 2510
},
{
"epoch": 0.41,
"learning_rate": 1.7797218483624946e-05,
"loss": 0.5419,
"step": 2520
},
{
"epoch": 0.41,
"learning_rate": 1.778600269179004e-05,
"loss": 0.5322,
"step": 2530
},
{
"epoch": 0.41,
"learning_rate": 1.777478689995514e-05,
"loss": 0.5385,
"step": 2540
},
{
"epoch": 0.42,
"learning_rate": 1.7763571108120233e-05,
"loss": 0.5218,
"step": 2550
},
{
"epoch": 0.42,
"learning_rate": 1.775235531628533e-05,
"loss": 0.5205,
"step": 2560
},
{
"epoch": 0.42,
"learning_rate": 1.774113952445043e-05,
"loss": 0.5293,
"step": 2570
},
{
"epoch": 0.42,
"learning_rate": 1.7729923732615524e-05,
"loss": 0.5289,
"step": 2580
},
{
"epoch": 0.42,
"learning_rate": 1.771870794078062e-05,
"loss": 0.5492,
"step": 2590
},
{
"epoch": 0.42,
"learning_rate": 1.7707492148945717e-05,
"loss": 0.5246,
"step": 2600
},
{
"epoch": 0.43,
"learning_rate": 1.7696276357110815e-05,
"loss": 0.5331,
"step": 2610
},
{
"epoch": 0.43,
"learning_rate": 1.768506056527591e-05,
"loss": 0.5423,
"step": 2620
},
{
"epoch": 0.43,
"learning_rate": 1.7673844773441007e-05,
"loss": 0.533,
"step": 2630
},
{
"epoch": 0.43,
"learning_rate": 1.7662628981606102e-05,
"loss": 0.538,
"step": 2640
},
{
"epoch": 0.43,
"learning_rate": 1.7651413189771197e-05,
"loss": 0.5277,
"step": 2650
},
{
"epoch": 0.43,
"learning_rate": 1.7640197397936295e-05,
"loss": 0.5307,
"step": 2660
},
{
"epoch": 0.44,
"learning_rate": 1.7628981606101393e-05,
"loss": 0.5279,
"step": 2670
},
{
"epoch": 0.44,
"learning_rate": 1.7617765814266488e-05,
"loss": 0.5358,
"step": 2680
},
{
"epoch": 0.44,
"learning_rate": 1.7606550022431586e-05,
"loss": 0.5265,
"step": 2690
},
{
"epoch": 0.44,
"learning_rate": 1.759533423059668e-05,
"loss": 0.5285,
"step": 2700
},
{
"epoch": 0.44,
"learning_rate": 1.758411843876178e-05,
"loss": 0.5303,
"step": 2710
},
{
"epoch": 0.44,
"learning_rate": 1.7572902646926876e-05,
"loss": 0.5431,
"step": 2720
},
{
"epoch": 0.45,
"learning_rate": 1.756168685509197e-05,
"loss": 0.5312,
"step": 2730
},
{
"epoch": 0.45,
"learning_rate": 1.7550471063257066e-05,
"loss": 0.5296,
"step": 2740
},
{
"epoch": 0.45,
"learning_rate": 1.7539255271422164e-05,
"loss": 0.5282,
"step": 2750
},
{
"epoch": 0.45,
"learning_rate": 1.752803947958726e-05,
"loss": 0.5378,
"step": 2760
},
{
"epoch": 0.45,
"learning_rate": 1.7516823687752356e-05,
"loss": 0.5372,
"step": 2770
},
{
"epoch": 0.45,
"learning_rate": 1.7505607895917455e-05,
"loss": 0.5235,
"step": 2780
},
{
"epoch": 0.46,
"learning_rate": 1.749439210408255e-05,
"loss": 0.5319,
"step": 2790
},
{
"epoch": 0.46,
"learning_rate": 1.7483176312247647e-05,
"loss": 0.522,
"step": 2800
},
{
"epoch": 0.46,
"learning_rate": 1.7471960520412742e-05,
"loss": 0.5346,
"step": 2810
},
{
"epoch": 0.46,
"learning_rate": 1.746074472857784e-05,
"loss": 0.5313,
"step": 2820
},
{
"epoch": 0.46,
"learning_rate": 1.7449528936742938e-05,
"loss": 0.5459,
"step": 2830
},
{
"epoch": 0.46,
"learning_rate": 1.7438313144908033e-05,
"loss": 0.5291,
"step": 2840
},
{
"epoch": 0.47,
"learning_rate": 1.7427097353073127e-05,
"loss": 0.5398,
"step": 2850
},
{
"epoch": 0.47,
"learning_rate": 1.7415881561238225e-05,
"loss": 0.5225,
"step": 2860
},
{
"epoch": 0.47,
"learning_rate": 1.740466576940332e-05,
"loss": 0.5232,
"step": 2870
},
{
"epoch": 0.47,
"learning_rate": 1.7393449977568418e-05,
"loss": 0.5315,
"step": 2880
},
{
"epoch": 0.47,
"learning_rate": 1.7382234185733516e-05,
"loss": 0.5323,
"step": 2890
},
{
"epoch": 0.47,
"learning_rate": 1.737101839389861e-05,
"loss": 0.5278,
"step": 2900
},
{
"epoch": 0.47,
"learning_rate": 1.7359802602063705e-05,
"loss": 0.5367,
"step": 2910
},
{
"epoch": 0.48,
"learning_rate": 1.7348586810228804e-05,
"loss": 0.5203,
"step": 2920
},
{
"epoch": 0.48,
"learning_rate": 1.73373710183939e-05,
"loss": 0.5267,
"step": 2930
},
{
"epoch": 0.48,
"learning_rate": 1.7326155226558996e-05,
"loss": 0.5389,
"step": 2940
},
{
"epoch": 0.48,
"learning_rate": 1.7314939434724094e-05,
"loss": 0.5327,
"step": 2950
},
{
"epoch": 0.48,
"learning_rate": 1.730372364288919e-05,
"loss": 0.5294,
"step": 2960
},
{
"epoch": 0.48,
"learning_rate": 1.7292507851054287e-05,
"loss": 0.5287,
"step": 2970
},
{
"epoch": 0.49,
"learning_rate": 1.728129205921938e-05,
"loss": 0.5358,
"step": 2980
},
{
"epoch": 0.49,
"learning_rate": 1.727007626738448e-05,
"loss": 0.5345,
"step": 2990
},
{
"epoch": 0.49,
"learning_rate": 1.7258860475549574e-05,
"loss": 0.5265,
"step": 3000
},
{
"epoch": 0.49,
"learning_rate": 1.7247644683714672e-05,
"loss": 0.525,
"step": 3010
},
{
"epoch": 0.49,
"learning_rate": 1.7236428891879767e-05,
"loss": 0.5299,
"step": 3020
},
{
"epoch": 0.49,
"learning_rate": 1.7225213100044865e-05,
"loss": 0.5295,
"step": 3030
},
{
"epoch": 0.5,
"learning_rate": 1.7213997308209963e-05,
"loss": 0.5317,
"step": 3040
},
{
"epoch": 0.5,
"learning_rate": 1.7202781516375058e-05,
"loss": 0.5275,
"step": 3050
},
{
"epoch": 0.5,
"learning_rate": 1.7191565724540153e-05,
"loss": 0.5304,
"step": 3060
},
{
"epoch": 0.5,
"learning_rate": 1.718034993270525e-05,
"loss": 0.5288,
"step": 3070
},
{
"epoch": 0.5,
"learning_rate": 1.716913414087035e-05,
"loss": 0.5197,
"step": 3080
},
{
"epoch": 0.5,
"learning_rate": 1.7157918349035443e-05,
"loss": 0.5305,
"step": 3090
},
{
"epoch": 0.51,
"learning_rate": 1.714670255720054e-05,
"loss": 0.5205,
"step": 3100
},
{
"epoch": 0.51,
"learning_rate": 1.7135486765365636e-05,
"loss": 0.5306,
"step": 3110
},
{
"epoch": 0.51,
"learning_rate": 1.712427097353073e-05,
"loss": 0.5228,
"step": 3120
},
{
"epoch": 0.51,
"learning_rate": 1.711305518169583e-05,
"loss": 0.5241,
"step": 3130
},
{
"epoch": 0.51,
"learning_rate": 1.7101839389860927e-05,
"loss": 0.5302,
"step": 3140
},
{
"epoch": 0.51,
"learning_rate": 1.709062359802602e-05,
"loss": 0.5254,
"step": 3150
},
{
"epoch": 0.52,
"learning_rate": 1.707940780619112e-05,
"loss": 0.529,
"step": 3160
},
{
"epoch": 0.52,
"learning_rate": 1.7068192014356214e-05,
"loss": 0.5224,
"step": 3170
},
{
"epoch": 0.52,
"learning_rate": 1.7056976222521312e-05,
"loss": 0.5328,
"step": 3180
},
{
"epoch": 0.52,
"learning_rate": 1.704576043068641e-05,
"loss": 0.5248,
"step": 3190
},
{
"epoch": 0.52,
"learning_rate": 1.7034544638851505e-05,
"loss": 0.5255,
"step": 3200
},
{
"epoch": 0.52,
"learning_rate": 1.70233288470166e-05,
"loss": 0.5227,
"step": 3210
},
{
"epoch": 0.53,
"learning_rate": 1.7012113055181698e-05,
"loss": 0.5266,
"step": 3220
},
{
"epoch": 0.53,
"learning_rate": 1.7000897263346792e-05,
"loss": 0.5202,
"step": 3230
},
{
"epoch": 0.53,
"learning_rate": 1.698968147151189e-05,
"loss": 0.5281,
"step": 3240
},
{
"epoch": 0.53,
"learning_rate": 1.697846567967699e-05,
"loss": 0.5326,
"step": 3250
},
{
"epoch": 0.53,
"learning_rate": 1.6967249887842083e-05,
"loss": 0.5226,
"step": 3260
},
{
"epoch": 0.53,
"learning_rate": 1.6956034096007178e-05,
"loss": 0.5169,
"step": 3270
},
{
"epoch": 0.54,
"learning_rate": 1.6944818304172276e-05,
"loss": 0.5308,
"step": 3280
},
{
"epoch": 0.54,
"learning_rate": 1.6933602512337374e-05,
"loss": 0.5189,
"step": 3290
},
{
"epoch": 0.54,
"learning_rate": 1.692238672050247e-05,
"loss": 0.5162,
"step": 3300
},
{
"epoch": 0.54,
"learning_rate": 1.6911170928667567e-05,
"loss": 0.5273,
"step": 3310
},
{
"epoch": 0.54,
"learning_rate": 1.689995513683266e-05,
"loss": 0.5184,
"step": 3320
},
{
"epoch": 0.54,
"learning_rate": 1.688873934499776e-05,
"loss": 0.5327,
"step": 3330
},
{
"epoch": 0.55,
"learning_rate": 1.6877523553162857e-05,
"loss": 0.5266,
"step": 3340
},
{
"epoch": 0.55,
"learning_rate": 1.6866307761327952e-05,
"loss": 0.5298,
"step": 3350
},
{
"epoch": 0.55,
"learning_rate": 1.6855091969493047e-05,
"loss": 0.529,
"step": 3360
},
{
"epoch": 0.55,
"learning_rate": 1.6843876177658145e-05,
"loss": 0.5149,
"step": 3370
},
{
"epoch": 0.55,
"learning_rate": 1.683266038582324e-05,
"loss": 0.5194,
"step": 3380
},
{
"epoch": 0.55,
"learning_rate": 1.6821444593988337e-05,
"loss": 0.5307,
"step": 3390
},
{
"epoch": 0.55,
"learning_rate": 1.6810228802153435e-05,
"loss": 0.5248,
"step": 3400
},
{
"epoch": 0.56,
"learning_rate": 1.679901301031853e-05,
"loss": 0.5214,
"step": 3410
},
{
"epoch": 0.56,
"learning_rate": 1.6787797218483625e-05,
"loss": 0.5306,
"step": 3420
},
{
"epoch": 0.56,
"learning_rate": 1.6776581426648723e-05,
"loss": 0.5154,
"step": 3430
},
{
"epoch": 0.56,
"learning_rate": 1.676536563481382e-05,
"loss": 0.5237,
"step": 3440
},
{
"epoch": 0.56,
"learning_rate": 1.6754149842978916e-05,
"loss": 0.5081,
"step": 3450
},
{
"epoch": 0.56,
"learning_rate": 1.6742934051144014e-05,
"loss": 0.5244,
"step": 3460
},
{
"epoch": 0.57,
"learning_rate": 1.6731718259309108e-05,
"loss": 0.5235,
"step": 3470
},
{
"epoch": 0.57,
"learning_rate": 1.6720502467474203e-05,
"loss": 0.5234,
"step": 3480
},
{
"epoch": 0.57,
"learning_rate": 1.67092866756393e-05,
"loss": 0.5189,
"step": 3490
},
{
"epoch": 0.57,
"learning_rate": 1.66980708838044e-05,
"loss": 0.5247,
"step": 3500
},
{
"epoch": 0.57,
"learning_rate": 1.6686855091969494e-05,
"loss": 0.5196,
"step": 3510
},
{
"epoch": 0.57,
"learning_rate": 1.6675639300134592e-05,
"loss": 0.5155,
"step": 3520
},
{
"epoch": 0.58,
"learning_rate": 1.6664423508299686e-05,
"loss": 0.5308,
"step": 3530
},
{
"epoch": 0.58,
"learning_rate": 1.6653207716464784e-05,
"loss": 0.5132,
"step": 3540
},
{
"epoch": 0.58,
"learning_rate": 1.6641991924629883e-05,
"loss": 0.5259,
"step": 3550
},
{
"epoch": 0.58,
"learning_rate": 1.6630776132794977e-05,
"loss": 0.5314,
"step": 3560
},
{
"epoch": 0.58,
"learning_rate": 1.6619560340960072e-05,
"loss": 0.5243,
"step": 3570
},
{
"epoch": 0.58,
"learning_rate": 1.660834454912517e-05,
"loss": 0.5169,
"step": 3580
},
{
"epoch": 0.59,
"learning_rate": 1.6597128757290265e-05,
"loss": 0.5337,
"step": 3590
},
{
"epoch": 0.59,
"learning_rate": 1.6585912965455363e-05,
"loss": 0.5289,
"step": 3600
},
{
"epoch": 0.59,
"learning_rate": 1.657469717362046e-05,
"loss": 0.5277,
"step": 3610
},
{
"epoch": 0.59,
"learning_rate": 1.6563481381785555e-05,
"loss": 0.5248,
"step": 3620
},
{
"epoch": 0.59,
"learning_rate": 1.655226558995065e-05,
"loss": 0.5183,
"step": 3630
},
{
"epoch": 0.59,
"learning_rate": 1.6541049798115748e-05,
"loss": 0.5115,
"step": 3640
},
{
"epoch": 0.6,
"learning_rate": 1.6529834006280846e-05,
"loss": 0.5304,
"step": 3650
},
{
"epoch": 0.6,
"learning_rate": 1.651861821444594e-05,
"loss": 0.521,
"step": 3660
},
{
"epoch": 0.6,
"learning_rate": 1.650740242261104e-05,
"loss": 0.5214,
"step": 3670
},
{
"epoch": 0.6,
"learning_rate": 1.6496186630776133e-05,
"loss": 0.5096,
"step": 3680
},
{
"epoch": 0.6,
"learning_rate": 1.648497083894123e-05,
"loss": 0.5259,
"step": 3690
},
{
"epoch": 0.6,
"learning_rate": 1.647375504710633e-05,
"loss": 0.522,
"step": 3700
},
{
"epoch": 0.61,
"learning_rate": 1.6462539255271424e-05,
"loss": 0.5248,
"step": 3710
},
{
"epoch": 0.61,
"learning_rate": 1.645132346343652e-05,
"loss": 0.5199,
"step": 3720
},
{
"epoch": 0.61,
"learning_rate": 1.6440107671601617e-05,
"loss": 0.5139,
"step": 3730
},
{
"epoch": 0.61,
"learning_rate": 1.642889187976671e-05,
"loss": 0.5187,
"step": 3740
},
{
"epoch": 0.61,
"learning_rate": 1.641767608793181e-05,
"loss": 0.5287,
"step": 3750
},
{
"epoch": 0.61,
"learning_rate": 1.6406460296096908e-05,
"loss": 0.5186,
"step": 3760
},
{
"epoch": 0.62,
"learning_rate": 1.6395244504262002e-05,
"loss": 0.5163,
"step": 3770
},
{
"epoch": 0.62,
"learning_rate": 1.6384028712427097e-05,
"loss": 0.5234,
"step": 3780
},
{
"epoch": 0.62,
"learning_rate": 1.6372812920592195e-05,
"loss": 0.5194,
"step": 3790
},
{
"epoch": 0.62,
"learning_rate": 1.6361597128757293e-05,
"loss": 0.5202,
"step": 3800
},
{
"epoch": 0.62,
"learning_rate": 1.6350381336922388e-05,
"loss": 0.5079,
"step": 3810
},
{
"epoch": 0.62,
"learning_rate": 1.6339165545087486e-05,
"loss": 0.5285,
"step": 3820
},
{
"epoch": 0.62,
"learning_rate": 1.632794975325258e-05,
"loss": 0.5293,
"step": 3830
},
{
"epoch": 0.63,
"learning_rate": 1.6316733961417675e-05,
"loss": 0.5281,
"step": 3840
},
{
"epoch": 0.63,
"learning_rate": 1.6305518169582773e-05,
"loss": 0.5256,
"step": 3850
},
{
"epoch": 0.63,
"learning_rate": 1.629430237774787e-05,
"loss": 0.5185,
"step": 3860
},
{
"epoch": 0.63,
"learning_rate": 1.6283086585912966e-05,
"loss": 0.5191,
"step": 3870
},
{
"epoch": 0.63,
"learning_rate": 1.6271870794078064e-05,
"loss": 0.5282,
"step": 3880
},
{
"epoch": 0.63,
"learning_rate": 1.626065500224316e-05,
"loss": 0.5249,
"step": 3890
},
{
"epoch": 0.64,
"learning_rate": 1.6249439210408257e-05,
"loss": 0.5072,
"step": 3900
},
{
"epoch": 0.64,
"learning_rate": 1.6238223418573355e-05,
"loss": 0.5154,
"step": 3910
},
{
"epoch": 0.64,
"learning_rate": 1.622700762673845e-05,
"loss": 0.5154,
"step": 3920
},
{
"epoch": 0.64,
"learning_rate": 1.6215791834903544e-05,
"loss": 0.5181,
"step": 3930
},
{
"epoch": 0.64,
"learning_rate": 1.6204576043068642e-05,
"loss": 0.516,
"step": 3940
},
{
"epoch": 0.64,
"learning_rate": 1.6193360251233737e-05,
"loss": 0.519,
"step": 3950
},
{
"epoch": 0.65,
"learning_rate": 1.6182144459398835e-05,
"loss": 0.5253,
"step": 3960
},
{
"epoch": 0.65,
"learning_rate": 1.6170928667563933e-05,
"loss": 0.5235,
"step": 3970
},
{
"epoch": 0.65,
"learning_rate": 1.6159712875729028e-05,
"loss": 0.5187,
"step": 3980
},
{
"epoch": 0.65,
"learning_rate": 1.6148497083894122e-05,
"loss": 0.5098,
"step": 3990
},
{
"epoch": 0.65,
"learning_rate": 1.613728129205922e-05,
"loss": 0.5143,
"step": 4000
},
{
"epoch": 0.65,
"learning_rate": 1.612606550022432e-05,
"loss": 0.5114,
"step": 4010
},
{
"epoch": 0.66,
"learning_rate": 1.6114849708389413e-05,
"loss": 0.5152,
"step": 4020
},
{
"epoch": 0.66,
"learning_rate": 1.610363391655451e-05,
"loss": 0.508,
"step": 4030
},
{
"epoch": 0.66,
"learning_rate": 1.6092418124719606e-05,
"loss": 0.5062,
"step": 4040
},
{
"epoch": 0.66,
"learning_rate": 1.6081202332884704e-05,
"loss": 0.5186,
"step": 4050
},
{
"epoch": 0.66,
"learning_rate": 1.6069986541049802e-05,
"loss": 0.5312,
"step": 4060
},
{
"epoch": 0.66,
"learning_rate": 1.6058770749214896e-05,
"loss": 0.5238,
"step": 4070
},
{
"epoch": 0.67,
"learning_rate": 1.604755495737999e-05,
"loss": 0.5026,
"step": 4080
},
{
"epoch": 0.67,
"learning_rate": 1.603633916554509e-05,
"loss": 0.5239,
"step": 4090
},
{
"epoch": 0.67,
"learning_rate": 1.6025123373710184e-05,
"loss": 0.5185,
"step": 4100
},
{
"epoch": 0.67,
"learning_rate": 1.6013907581875282e-05,
"loss": 0.5162,
"step": 4110
},
{
"epoch": 0.67,
"learning_rate": 1.600269179004038e-05,
"loss": 0.5212,
"step": 4120
},
{
"epoch": 0.67,
"learning_rate": 1.5991475998205475e-05,
"loss": 0.5128,
"step": 4130
},
{
"epoch": 0.68,
"learning_rate": 1.598026020637057e-05,
"loss": 0.5188,
"step": 4140
},
{
"epoch": 0.68,
"learning_rate": 1.5969044414535667e-05,
"loss": 0.506,
"step": 4150
},
{
"epoch": 0.68,
"learning_rate": 1.5957828622700765e-05,
"loss": 0.5128,
"step": 4160
},
{
"epoch": 0.68,
"learning_rate": 1.594661283086586e-05,
"loss": 0.5244,
"step": 4170
},
{
"epoch": 0.68,
"learning_rate": 1.5935397039030958e-05,
"loss": 0.5078,
"step": 4180
},
{
"epoch": 0.68,
"learning_rate": 1.5924181247196053e-05,
"loss": 0.5319,
"step": 4190
},
{
"epoch": 0.69,
"learning_rate": 1.5912965455361147e-05,
"loss": 0.5186,
"step": 4200
},
{
"epoch": 0.69,
"learning_rate": 1.5901749663526245e-05,
"loss": 0.5107,
"step": 4210
},
{
"epoch": 0.69,
"learning_rate": 1.5890533871691344e-05,
"loss": 0.5131,
"step": 4220
},
{
"epoch": 0.69,
"learning_rate": 1.5879318079856438e-05,
"loss": 0.5136,
"step": 4230
},
{
"epoch": 0.69,
"learning_rate": 1.5868102288021536e-05,
"loss": 0.5059,
"step": 4240
},
{
"epoch": 0.69,
"learning_rate": 1.585688649618663e-05,
"loss": 0.5064,
"step": 4250
},
{
"epoch": 0.7,
"learning_rate": 1.584567070435173e-05,
"loss": 0.5063,
"step": 4260
},
{
"epoch": 0.7,
"learning_rate": 1.5834454912516827e-05,
"loss": 0.5301,
"step": 4270
},
{
"epoch": 0.7,
"learning_rate": 1.582323912068192e-05,
"loss": 0.5228,
"step": 4280
},
{
"epoch": 0.7,
"learning_rate": 1.5812023328847016e-05,
"loss": 0.5207,
"step": 4290
},
{
"epoch": 0.7,
"learning_rate": 1.5800807537012114e-05,
"loss": 0.5064,
"step": 4300
},
{
"epoch": 0.7,
"learning_rate": 1.578959174517721e-05,
"loss": 0.5148,
"step": 4310
},
{
"epoch": 0.7,
"learning_rate": 1.5778375953342307e-05,
"loss": 0.5181,
"step": 4320
},
{
"epoch": 0.71,
"learning_rate": 1.5767160161507405e-05,
"loss": 0.5093,
"step": 4330
},
{
"epoch": 0.71,
"learning_rate": 1.57559443696725e-05,
"loss": 0.5074,
"step": 4340
},
{
"epoch": 0.71,
"learning_rate": 1.5744728577837594e-05,
"loss": 0.5226,
"step": 4350
},
{
"epoch": 0.71,
"learning_rate": 1.5733512786002693e-05,
"loss": 0.5122,
"step": 4360
},
{
"epoch": 0.71,
"learning_rate": 1.572229699416779e-05,
"loss": 0.5197,
"step": 4370
},
{
"epoch": 0.71,
"learning_rate": 1.5711081202332885e-05,
"loss": 0.5143,
"step": 4380
},
{
"epoch": 0.72,
"learning_rate": 1.5699865410497983e-05,
"loss": 0.5138,
"step": 4390
},
{
"epoch": 0.72,
"learning_rate": 1.5688649618663078e-05,
"loss": 0.5326,
"step": 4400
},
{
"epoch": 0.72,
"learning_rate": 1.5677433826828176e-05,
"loss": 0.5139,
"step": 4410
},
{
"epoch": 0.72,
"learning_rate": 1.5666218034993274e-05,
"loss": 0.5208,
"step": 4420
},
{
"epoch": 0.72,
"learning_rate": 1.565500224315837e-05,
"loss": 0.5114,
"step": 4430
},
{
"epoch": 0.72,
"learning_rate": 1.5643786451323463e-05,
"loss": 0.5302,
"step": 4440
},
{
"epoch": 0.73,
"learning_rate": 1.563257065948856e-05,
"loss": 0.5207,
"step": 4450
},
{
"epoch": 0.73,
"learning_rate": 1.5621354867653656e-05,
"loss": 0.4986,
"step": 4460
},
{
"epoch": 0.73,
"learning_rate": 1.5610139075818754e-05,
"loss": 0.5175,
"step": 4470
},
{
"epoch": 0.73,
"learning_rate": 1.5598923283983852e-05,
"loss": 0.5168,
"step": 4480
},
{
"epoch": 0.73,
"learning_rate": 1.5587707492148947e-05,
"loss": 0.5169,
"step": 4490
},
{
"epoch": 0.73,
"learning_rate": 1.557649170031404e-05,
"loss": 0.5121,
"step": 4500
},
{
"epoch": 0.74,
"learning_rate": 1.556527590847914e-05,
"loss": 0.5094,
"step": 4510
},
{
"epoch": 0.74,
"learning_rate": 1.5554060116644238e-05,
"loss": 0.5169,
"step": 4520
},
{
"epoch": 0.74,
"learning_rate": 1.5542844324809332e-05,
"loss": 0.5004,
"step": 4530
},
{
"epoch": 0.74,
"learning_rate": 1.553162853297443e-05,
"loss": 0.5154,
"step": 4540
},
{
"epoch": 0.74,
"learning_rate": 1.5520412741139525e-05,
"loss": 0.5143,
"step": 4550
},
{
"epoch": 0.74,
"learning_rate": 1.550919694930462e-05,
"loss": 0.5234,
"step": 4560
},
{
"epoch": 0.75,
"learning_rate": 1.5497981157469718e-05,
"loss": 0.5072,
"step": 4570
},
{
"epoch": 0.75,
"learning_rate": 1.5486765365634816e-05,
"loss": 0.5196,
"step": 4580
},
{
"epoch": 0.75,
"learning_rate": 1.547554957379991e-05,
"loss": 0.5093,
"step": 4590
},
{
"epoch": 0.75,
"learning_rate": 1.546433378196501e-05,
"loss": 0.508,
"step": 4600
},
{
"epoch": 0.75,
"learning_rate": 1.5453117990130103e-05,
"loss": 0.5118,
"step": 4610
},
{
"epoch": 0.75,
"learning_rate": 1.54419021982952e-05,
"loss": 0.5169,
"step": 4620
},
{
"epoch": 0.76,
"learning_rate": 1.54306864064603e-05,
"loss": 0.5196,
"step": 4630
},
{
"epoch": 0.76,
"learning_rate": 1.5419470614625394e-05,
"loss": 0.5169,
"step": 4640
},
{
"epoch": 0.76,
"learning_rate": 1.540825482279049e-05,
"loss": 0.5043,
"step": 4650
},
{
"epoch": 0.76,
"learning_rate": 1.5397039030955587e-05,
"loss": 0.5099,
"step": 4660
},
{
"epoch": 0.76,
"learning_rate": 1.5385823239120685e-05,
"loss": 0.5285,
"step": 4670
},
{
"epoch": 0.76,
"learning_rate": 1.537460744728578e-05,
"loss": 0.5192,
"step": 4680
},
{
"epoch": 0.77,
"learning_rate": 1.5363391655450877e-05,
"loss": 0.521,
"step": 4690
},
{
"epoch": 0.77,
"learning_rate": 1.5352175863615972e-05,
"loss": 0.5209,
"step": 4700
},
{
"epoch": 0.77,
"learning_rate": 1.5340960071781067e-05,
"loss": 0.5177,
"step": 4710
},
{
"epoch": 0.77,
"learning_rate": 1.5329744279946165e-05,
"loss": 0.504,
"step": 4720
},
{
"epoch": 0.77,
"learning_rate": 1.5318528488111263e-05,
"loss": 0.5031,
"step": 4730
},
{
"epoch": 0.77,
"learning_rate": 1.5307312696276357e-05,
"loss": 0.5065,
"step": 4740
},
{
"epoch": 0.78,
"learning_rate": 1.5296096904441456e-05,
"loss": 0.5102,
"step": 4750
},
{
"epoch": 0.78,
"learning_rate": 1.528488111260655e-05,
"loss": 0.5086,
"step": 4760
},
{
"epoch": 0.78,
"learning_rate": 1.5273665320771648e-05,
"loss": 0.5045,
"step": 4770
},
{
"epoch": 0.78,
"learning_rate": 1.5262449528936746e-05,
"loss": 0.503,
"step": 4780
},
{
"epoch": 0.78,
"learning_rate": 1.5251233737101841e-05,
"loss": 0.5178,
"step": 4790
},
{
"epoch": 0.78,
"learning_rate": 1.5240017945266936e-05,
"loss": 0.5174,
"step": 4800
},
{
"epoch": 0.78,
"learning_rate": 1.5228802153432034e-05,
"loss": 0.5183,
"step": 4810
},
{
"epoch": 0.79,
"learning_rate": 1.521758636159713e-05,
"loss": 0.5003,
"step": 4820
},
{
"epoch": 0.79,
"learning_rate": 1.5206370569762225e-05,
"loss": 0.5163,
"step": 4830
},
{
"epoch": 0.79,
"learning_rate": 1.5195154777927323e-05,
"loss": 0.5069,
"step": 4840
},
{
"epoch": 0.79,
"learning_rate": 1.5183938986092419e-05,
"loss": 0.5132,
"step": 4850
},
{
"epoch": 0.79,
"learning_rate": 1.5172723194257515e-05,
"loss": 0.5199,
"step": 4860
},
{
"epoch": 0.79,
"learning_rate": 1.5161507402422614e-05,
"loss": 0.5211,
"step": 4870
},
{
"epoch": 0.8,
"learning_rate": 1.5150291610587708e-05,
"loss": 0.5,
"step": 4880
},
{
"epoch": 0.8,
"learning_rate": 1.5139075818752805e-05,
"loss": 0.5073,
"step": 4890
},
{
"epoch": 0.8,
"learning_rate": 1.5127860026917903e-05,
"loss": 0.5107,
"step": 4900
},
{
"epoch": 0.8,
"learning_rate": 1.5116644235082997e-05,
"loss": 0.5222,
"step": 4910
},
{
"epoch": 0.8,
"learning_rate": 1.5105428443248094e-05,
"loss": 0.5136,
"step": 4920
},
{
"epoch": 0.8,
"learning_rate": 1.5094212651413192e-05,
"loss": 0.5058,
"step": 4930
},
{
"epoch": 0.81,
"learning_rate": 1.5082996859578288e-05,
"loss": 0.514,
"step": 4940
},
{
"epoch": 0.81,
"learning_rate": 1.5071781067743383e-05,
"loss": 0.5073,
"step": 4950
},
{
"epoch": 0.81,
"learning_rate": 1.506056527590848e-05,
"loss": 0.5185,
"step": 4960
},
{
"epoch": 0.81,
"learning_rate": 1.5049349484073577e-05,
"loss": 0.5202,
"step": 4970
},
{
"epoch": 0.81,
"learning_rate": 1.5038133692238672e-05,
"loss": 0.5137,
"step": 4980
},
{
"epoch": 0.81,
"learning_rate": 1.502691790040377e-05,
"loss": 0.5035,
"step": 4990
},
{
"epoch": 0.82,
"learning_rate": 1.5015702108568866e-05,
"loss": 0.5166,
"step": 5000
},
{
"epoch": 0.82,
"learning_rate": 1.500448631673396e-05,
"loss": 0.5113,
"step": 5010
},
{
"epoch": 0.82,
"learning_rate": 1.4993270524899059e-05,
"loss": 0.5061,
"step": 5020
},
{
"epoch": 0.82,
"learning_rate": 1.4982054733064155e-05,
"loss": 0.4983,
"step": 5030
},
{
"epoch": 0.82,
"learning_rate": 1.4970838941229252e-05,
"loss": 0.5184,
"step": 5040
},
{
"epoch": 0.82,
"learning_rate": 1.495962314939435e-05,
"loss": 0.5146,
"step": 5050
},
{
"epoch": 0.83,
"learning_rate": 1.4948407357559444e-05,
"loss": 0.5032,
"step": 5060
},
{
"epoch": 0.83,
"learning_rate": 1.493719156572454e-05,
"loss": 0.5022,
"step": 5070
},
{
"epoch": 0.83,
"learning_rate": 1.4925975773889639e-05,
"loss": 0.518,
"step": 5080
},
{
"epoch": 0.83,
"learning_rate": 1.4914759982054733e-05,
"loss": 0.5044,
"step": 5090
},
{
"epoch": 0.83,
"learning_rate": 1.490354419021983e-05,
"loss": 0.5065,
"step": 5100
},
{
"epoch": 0.83,
"learning_rate": 1.4892328398384928e-05,
"loss": 0.506,
"step": 5110
},
{
"epoch": 0.84,
"learning_rate": 1.4881112606550024e-05,
"loss": 0.5093,
"step": 5120
},
{
"epoch": 0.84,
"learning_rate": 1.4869896814715119e-05,
"loss": 0.5114,
"step": 5130
},
{
"epoch": 0.84,
"learning_rate": 1.4858681022880217e-05,
"loss": 0.5172,
"step": 5140
},
{
"epoch": 0.84,
"learning_rate": 1.4847465231045313e-05,
"loss": 0.5236,
"step": 5150
},
{
"epoch": 0.84,
"learning_rate": 1.4836249439210408e-05,
"loss": 0.5068,
"step": 5160
},
{
"epoch": 0.84,
"learning_rate": 1.4825033647375506e-05,
"loss": 0.5078,
"step": 5170
},
{
"epoch": 0.85,
"learning_rate": 1.4813817855540602e-05,
"loss": 0.5083,
"step": 5180
},
{
"epoch": 0.85,
"learning_rate": 1.4802602063705697e-05,
"loss": 0.51,
"step": 5190
},
{
"epoch": 0.85,
"learning_rate": 1.4791386271870795e-05,
"loss": 0.5009,
"step": 5200
},
{
"epoch": 0.85,
"learning_rate": 1.4780170480035891e-05,
"loss": 0.5067,
"step": 5210
},
{
"epoch": 0.85,
"learning_rate": 1.4768954688200988e-05,
"loss": 0.5092,
"step": 5220
},
{
"epoch": 0.85,
"learning_rate": 1.4757738896366086e-05,
"loss": 0.4984,
"step": 5230
},
{
"epoch": 0.86,
"learning_rate": 1.474652310453118e-05,
"loss": 0.5114,
"step": 5240
},
{
"epoch": 0.86,
"learning_rate": 1.4735307312696277e-05,
"loss": 0.5106,
"step": 5250
},
{
"epoch": 0.86,
"learning_rate": 1.4724091520861375e-05,
"loss": 0.5154,
"step": 5260
},
{
"epoch": 0.86,
"learning_rate": 1.471287572902647e-05,
"loss": 0.5065,
"step": 5270
},
{
"epoch": 0.86,
"learning_rate": 1.4701659937191568e-05,
"loss": 0.5049,
"step": 5280
},
{
"epoch": 0.86,
"learning_rate": 1.4690444145356664e-05,
"loss": 0.5126,
"step": 5290
},
{
"epoch": 0.86,
"learning_rate": 1.467922835352176e-05,
"loss": 0.5086,
"step": 5300
},
{
"epoch": 0.87,
"learning_rate": 1.4668012561686857e-05,
"loss": 0.5081,
"step": 5310
},
{
"epoch": 0.87,
"learning_rate": 1.4656796769851953e-05,
"loss": 0.5125,
"step": 5320
},
{
"epoch": 0.87,
"learning_rate": 1.464558097801705e-05,
"loss": 0.5097,
"step": 5330
},
{
"epoch": 0.87,
"learning_rate": 1.4634365186182147e-05,
"loss": 0.498,
"step": 5340
},
{
"epoch": 0.87,
"learning_rate": 1.4623149394347242e-05,
"loss": 0.5011,
"step": 5350
},
{
"epoch": 0.87,
"learning_rate": 1.4611933602512338e-05,
"loss": 0.519,
"step": 5360
},
{
"epoch": 0.88,
"learning_rate": 1.4600717810677436e-05,
"loss": 0.4988,
"step": 5370
},
{
"epoch": 0.88,
"learning_rate": 1.4589502018842531e-05,
"loss": 0.5005,
"step": 5380
},
{
"epoch": 0.88,
"learning_rate": 1.4578286227007627e-05,
"loss": 0.5051,
"step": 5390
},
{
"epoch": 0.88,
"learning_rate": 1.4567070435172726e-05,
"loss": 0.495,
"step": 5400
},
{
"epoch": 0.88,
"learning_rate": 1.4555854643337822e-05,
"loss": 0.5107,
"step": 5410
},
{
"epoch": 0.88,
"learning_rate": 1.4544638851502917e-05,
"loss": 0.5104,
"step": 5420
},
{
"epoch": 0.89,
"learning_rate": 1.4533423059668015e-05,
"loss": 0.5104,
"step": 5430
},
{
"epoch": 0.89,
"learning_rate": 1.4522207267833111e-05,
"loss": 0.5118,
"step": 5440
},
{
"epoch": 0.89,
"learning_rate": 1.4510991475998206e-05,
"loss": 0.5085,
"step": 5450
},
{
"epoch": 0.89,
"learning_rate": 1.4499775684163304e-05,
"loss": 0.4994,
"step": 5460
},
{
"epoch": 0.89,
"learning_rate": 1.44885598923284e-05,
"loss": 0.4985,
"step": 5470
},
{
"epoch": 0.89,
"learning_rate": 1.4477344100493496e-05,
"loss": 0.4968,
"step": 5480
},
{
"epoch": 0.9,
"learning_rate": 1.4466128308658593e-05,
"loss": 0.5028,
"step": 5490
},
{
"epoch": 0.9,
"learning_rate": 1.4454912516823689e-05,
"loss": 0.5092,
"step": 5500
},
{
"epoch": 0.9,
"learning_rate": 1.4443696724988785e-05,
"loss": 0.5083,
"step": 5510
},
{
"epoch": 0.9,
"learning_rate": 1.4432480933153884e-05,
"loss": 0.4946,
"step": 5520
},
{
"epoch": 0.9,
"learning_rate": 1.4421265141318978e-05,
"loss": 0.5098,
"step": 5530
},
{
"epoch": 0.9,
"learning_rate": 1.4410049349484075e-05,
"loss": 0.5119,
"step": 5540
},
{
"epoch": 0.91,
"learning_rate": 1.4398833557649173e-05,
"loss": 0.499,
"step": 5550
},
{
"epoch": 0.91,
"learning_rate": 1.4387617765814267e-05,
"loss": 0.5077,
"step": 5560
},
{
"epoch": 0.91,
"learning_rate": 1.4376401973979364e-05,
"loss": 0.5167,
"step": 5570
},
{
"epoch": 0.91,
"learning_rate": 1.4365186182144462e-05,
"loss": 0.5103,
"step": 5580
},
{
"epoch": 0.91,
"learning_rate": 1.4353970390309558e-05,
"loss": 0.5073,
"step": 5590
},
{
"epoch": 0.91,
"learning_rate": 1.4342754598474653e-05,
"loss": 0.5183,
"step": 5600
},
{
"epoch": 0.92,
"learning_rate": 1.433153880663975e-05,
"loss": 0.4945,
"step": 5610
},
{
"epoch": 0.92,
"learning_rate": 1.4320323014804847e-05,
"loss": 0.5049,
"step": 5620
},
{
"epoch": 0.92,
"learning_rate": 1.4309107222969942e-05,
"loss": 0.4994,
"step": 5630
},
{
"epoch": 0.92,
"learning_rate": 1.429789143113504e-05,
"loss": 0.4904,
"step": 5640
},
{
"epoch": 0.92,
"learning_rate": 1.4286675639300136e-05,
"loss": 0.5215,
"step": 5650
},
{
"epoch": 0.92,
"learning_rate": 1.4275459847465233e-05,
"loss": 0.5026,
"step": 5660
},
{
"epoch": 0.93,
"learning_rate": 1.4264244055630329e-05,
"loss": 0.5094,
"step": 5670
},
{
"epoch": 0.93,
"learning_rate": 1.4253028263795425e-05,
"loss": 0.5058,
"step": 5680
},
{
"epoch": 0.93,
"learning_rate": 1.4241812471960522e-05,
"loss": 0.5149,
"step": 5690
},
{
"epoch": 0.93,
"learning_rate": 1.423059668012562e-05,
"loss": 0.5156,
"step": 5700
},
{
"epoch": 0.93,
"learning_rate": 1.4219380888290714e-05,
"loss": 0.5042,
"step": 5710
},
{
"epoch": 0.93,
"learning_rate": 1.420816509645581e-05,
"loss": 0.4958,
"step": 5720
},
{
"epoch": 0.94,
"learning_rate": 1.4196949304620909e-05,
"loss": 0.5071,
"step": 5730
},
{
"epoch": 0.94,
"learning_rate": 1.4185733512786003e-05,
"loss": 0.4999,
"step": 5740
},
{
"epoch": 0.94,
"learning_rate": 1.41745177209511e-05,
"loss": 0.5089,
"step": 5750
},
{
"epoch": 0.94,
"learning_rate": 1.4163301929116198e-05,
"loss": 0.4997,
"step": 5760
},
{
"epoch": 0.94,
"learning_rate": 1.4152086137281294e-05,
"loss": 0.5034,
"step": 5770
},
{
"epoch": 0.94,
"learning_rate": 1.4140870345446389e-05,
"loss": 0.4994,
"step": 5780
},
{
"epoch": 0.94,
"learning_rate": 1.4129654553611487e-05,
"loss": 0.5106,
"step": 5790
},
{
"epoch": 0.95,
"learning_rate": 1.4118438761776583e-05,
"loss": 0.5032,
"step": 5800
},
{
"epoch": 0.95,
"learning_rate": 1.4107222969941678e-05,
"loss": 0.5118,
"step": 5810
},
{
"epoch": 0.95,
"learning_rate": 1.4096007178106776e-05,
"loss": 0.5173,
"step": 5820
},
{
"epoch": 0.95,
"learning_rate": 1.4084791386271872e-05,
"loss": 0.5107,
"step": 5830
},
{
"epoch": 0.95,
"learning_rate": 1.4073575594436969e-05,
"loss": 0.5117,
"step": 5840
},
{
"epoch": 0.95,
"learning_rate": 1.4062359802602065e-05,
"loss": 0.5038,
"step": 5850
},
{
"epoch": 0.96,
"learning_rate": 1.4051144010767161e-05,
"loss": 0.5058,
"step": 5860
},
{
"epoch": 0.96,
"learning_rate": 1.4039928218932258e-05,
"loss": 0.5131,
"step": 5870
},
{
"epoch": 0.96,
"learning_rate": 1.4028712427097356e-05,
"loss": 0.492,
"step": 5880
},
{
"epoch": 0.96,
"learning_rate": 1.401749663526245e-05,
"loss": 0.5011,
"step": 5890
},
{
"epoch": 0.96,
"learning_rate": 1.4006280843427547e-05,
"loss": 0.5045,
"step": 5900
},
{
"epoch": 0.96,
"learning_rate": 1.3995065051592645e-05,
"loss": 0.5069,
"step": 5910
},
{
"epoch": 0.97,
"learning_rate": 1.398384925975774e-05,
"loss": 0.4989,
"step": 5920
},
{
"epoch": 0.97,
"learning_rate": 1.3972633467922836e-05,
"loss": 0.4968,
"step": 5930
},
{
"epoch": 0.97,
"learning_rate": 1.3961417676087934e-05,
"loss": 0.4994,
"step": 5940
},
{
"epoch": 0.97,
"learning_rate": 1.395020188425303e-05,
"loss": 0.4964,
"step": 5950
},
{
"epoch": 0.97,
"learning_rate": 1.3938986092418125e-05,
"loss": 0.5107,
"step": 5960
},
{
"epoch": 0.97,
"learning_rate": 1.3927770300583223e-05,
"loss": 0.503,
"step": 5970
},
{
"epoch": 0.98,
"learning_rate": 1.391655450874832e-05,
"loss": 0.5122,
"step": 5980
},
{
"epoch": 0.98,
"learning_rate": 1.3905338716913414e-05,
"loss": 0.4971,
"step": 5990
},
{
"epoch": 0.98,
"learning_rate": 1.3894122925078512e-05,
"loss": 0.5108,
"step": 6000
},
{
"epoch": 0.98,
"learning_rate": 1.3882907133243608e-05,
"loss": 0.4972,
"step": 6010
},
{
"epoch": 0.98,
"learning_rate": 1.3871691341408705e-05,
"loss": 0.5065,
"step": 6020
},
{
"epoch": 0.98,
"learning_rate": 1.3860475549573801e-05,
"loss": 0.5199,
"step": 6030
},
{
"epoch": 0.99,
"learning_rate": 1.3849259757738897e-05,
"loss": 0.498,
"step": 6040
},
{
"epoch": 0.99,
"learning_rate": 1.3838043965903994e-05,
"loss": 0.5128,
"step": 6050
},
{
"epoch": 0.99,
"learning_rate": 1.3826828174069092e-05,
"loss": 0.4961,
"step": 6060
},
{
"epoch": 0.99,
"learning_rate": 1.3815612382234187e-05,
"loss": 0.4963,
"step": 6070
},
{
"epoch": 0.99,
"learning_rate": 1.3804396590399283e-05,
"loss": 0.5079,
"step": 6080
},
{
"epoch": 0.99,
"learning_rate": 1.3793180798564381e-05,
"loss": 0.5068,
"step": 6090
},
{
"epoch": 1.0,
"learning_rate": 1.3781965006729476e-05,
"loss": 0.4963,
"step": 6100
},
{
"epoch": 1.0,
"learning_rate": 1.3770749214894572e-05,
"loss": 0.5092,
"step": 6110
},
{
"epoch": 1.0,
"learning_rate": 1.375953342305967e-05,
"loss": 0.5008,
"step": 6120
},
{
"epoch": 1.0,
"learning_rate": 1.3748317631224766e-05,
"loss": 0.5106,
"step": 6130
},
{
"epoch": 1.0,
"learning_rate": 1.3737101839389861e-05,
"loss": 0.4955,
"step": 6140
},
{
"epoch": 1.0,
"learning_rate": 1.3725886047554959e-05,
"loss": 0.4905,
"step": 6150
},
{
"epoch": 1.01,
"learning_rate": 1.3714670255720055e-05,
"loss": 0.4966,
"step": 6160
},
{
"epoch": 1.01,
"learning_rate": 1.370345446388515e-05,
"loss": 0.4885,
"step": 6170
},
{
"epoch": 1.01,
"learning_rate": 1.3692238672050248e-05,
"loss": 0.5109,
"step": 6180
},
{
"epoch": 1.01,
"learning_rate": 1.3681022880215345e-05,
"loss": 0.5029,
"step": 6190
},
{
"epoch": 1.01,
"learning_rate": 1.3669807088380441e-05,
"loss": 0.4944,
"step": 6200
},
{
"epoch": 1.01,
"learning_rate": 1.3658591296545537e-05,
"loss": 0.494,
"step": 6210
},
{
"epoch": 1.02,
"learning_rate": 1.3647375504710634e-05,
"loss": 0.5075,
"step": 6220
},
{
"epoch": 1.02,
"learning_rate": 1.363615971287573e-05,
"loss": 0.5079,
"step": 6230
},
{
"epoch": 1.02,
"learning_rate": 1.3624943921040828e-05,
"loss": 0.495,
"step": 6240
},
{
"epoch": 1.02,
"learning_rate": 1.3613728129205923e-05,
"loss": 0.506,
"step": 6250
},
{
"epoch": 1.02,
"learning_rate": 1.3602512337371019e-05,
"loss": 0.4918,
"step": 6260
},
{
"epoch": 1.02,
"learning_rate": 1.3591296545536117e-05,
"loss": 0.4981,
"step": 6270
},
{
"epoch": 1.02,
"learning_rate": 1.3580080753701212e-05,
"loss": 0.4978,
"step": 6280
},
{
"epoch": 1.03,
"learning_rate": 1.3568864961866308e-05,
"loss": 0.4942,
"step": 6290
},
{
"epoch": 1.03,
"learning_rate": 1.3557649170031406e-05,
"loss": 0.5062,
"step": 6300
},
{
"epoch": 1.03,
"learning_rate": 1.3546433378196503e-05,
"loss": 0.5008,
"step": 6310
},
{
"epoch": 1.03,
"learning_rate": 1.3535217586361597e-05,
"loss": 0.5065,
"step": 6320
},
{
"epoch": 1.03,
"learning_rate": 1.3524001794526695e-05,
"loss": 0.5129,
"step": 6330
},
{
"epoch": 1.03,
"learning_rate": 1.3512786002691792e-05,
"loss": 0.5002,
"step": 6340
},
{
"epoch": 1.04,
"learning_rate": 1.3501570210856886e-05,
"loss": 0.5028,
"step": 6350
},
{
"epoch": 1.04,
"learning_rate": 1.3490354419021984e-05,
"loss": 0.4931,
"step": 6360
},
{
"epoch": 1.04,
"learning_rate": 1.347913862718708e-05,
"loss": 0.4971,
"step": 6370
},
{
"epoch": 1.04,
"learning_rate": 1.3467922835352177e-05,
"loss": 0.4965,
"step": 6380
},
{
"epoch": 1.04,
"learning_rate": 1.3456707043517273e-05,
"loss": 0.5136,
"step": 6390
},
{
"epoch": 1.04,
"learning_rate": 1.344549125168237e-05,
"loss": 0.4982,
"step": 6400
},
{
"epoch": 1.05,
"learning_rate": 1.3434275459847466e-05,
"loss": 0.4885,
"step": 6410
},
{
"epoch": 1.05,
"learning_rate": 1.3423059668012564e-05,
"loss": 0.4949,
"step": 6420
},
{
"epoch": 1.05,
"learning_rate": 1.3411843876177659e-05,
"loss": 0.5015,
"step": 6430
},
{
"epoch": 1.05,
"learning_rate": 1.3400628084342755e-05,
"loss": 0.4977,
"step": 6440
},
{
"epoch": 1.05,
"learning_rate": 1.3389412292507853e-05,
"loss": 0.5005,
"step": 6450
},
{
"epoch": 1.05,
"learning_rate": 1.3378196500672948e-05,
"loss": 0.4907,
"step": 6460
},
{
"epoch": 1.06,
"learning_rate": 1.3366980708838044e-05,
"loss": 0.509,
"step": 6470
},
{
"epoch": 1.06,
"learning_rate": 1.3355764917003142e-05,
"loss": 0.4904,
"step": 6480
},
{
"epoch": 1.06,
"learning_rate": 1.3344549125168239e-05,
"loss": 0.488,
"step": 6490
},
{
"epoch": 1.06,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.4863,
"step": 6500
},
{
"epoch": 1.06,
"learning_rate": 1.3322117541498431e-05,
"loss": 0.5017,
"step": 6510
},
{
"epoch": 1.06,
"learning_rate": 1.3310901749663528e-05,
"loss": 0.5029,
"step": 6520
},
{
"epoch": 1.07,
"learning_rate": 1.3299685957828622e-05,
"loss": 0.507,
"step": 6530
},
{
"epoch": 1.07,
"learning_rate": 1.328847016599372e-05,
"loss": 0.4966,
"step": 6540
},
{
"epoch": 1.07,
"learning_rate": 1.3277254374158817e-05,
"loss": 0.5035,
"step": 6550
},
{
"epoch": 1.07,
"learning_rate": 1.3266038582323913e-05,
"loss": 0.4877,
"step": 6560
},
{
"epoch": 1.07,
"learning_rate": 1.325482279048901e-05,
"loss": 0.4896,
"step": 6570
},
{
"epoch": 1.07,
"learning_rate": 1.3243606998654106e-05,
"loss": 0.5011,
"step": 6580
},
{
"epoch": 1.08,
"learning_rate": 1.3232391206819202e-05,
"loss": 0.5059,
"step": 6590
},
{
"epoch": 1.08,
"learning_rate": 1.32211754149843e-05,
"loss": 0.5017,
"step": 6600
},
{
"epoch": 1.08,
"learning_rate": 1.3209959623149395e-05,
"loss": 0.4982,
"step": 6610
},
{
"epoch": 1.08,
"learning_rate": 1.3198743831314491e-05,
"loss": 0.5009,
"step": 6620
},
{
"epoch": 1.08,
"learning_rate": 1.318752803947959e-05,
"loss": 0.4909,
"step": 6630
},
{
"epoch": 1.08,
"learning_rate": 1.3176312247644684e-05,
"loss": 0.5027,
"step": 6640
},
{
"epoch": 1.09,
"learning_rate": 1.316509645580978e-05,
"loss": 0.4997,
"step": 6650
},
{
"epoch": 1.09,
"learning_rate": 1.3153880663974878e-05,
"loss": 0.5015,
"step": 6660
},
{
"epoch": 1.09,
"learning_rate": 1.3142664872139975e-05,
"loss": 0.4974,
"step": 6670
},
{
"epoch": 1.09,
"learning_rate": 1.313144908030507e-05,
"loss": 0.4913,
"step": 6680
},
{
"epoch": 1.09,
"learning_rate": 1.3120233288470167e-05,
"loss": 0.4968,
"step": 6690
},
{
"epoch": 1.09,
"learning_rate": 1.3109017496635264e-05,
"loss": 0.4932,
"step": 6700
},
{
"epoch": 1.09,
"learning_rate": 1.3097801704800358e-05,
"loss": 0.5002,
"step": 6710
},
{
"epoch": 1.1,
"learning_rate": 1.3086585912965457e-05,
"loss": 0.5007,
"step": 6720
},
{
"epoch": 1.1,
"learning_rate": 1.3075370121130553e-05,
"loss": 0.5003,
"step": 6730
},
{
"epoch": 1.1,
"learning_rate": 1.306415432929565e-05,
"loss": 0.4896,
"step": 6740
},
{
"epoch": 1.1,
"learning_rate": 1.3052938537460747e-05,
"loss": 0.5006,
"step": 6750
},
{
"epoch": 1.1,
"learning_rate": 1.3041722745625842e-05,
"loss": 0.4972,
"step": 6760
},
{
"epoch": 1.1,
"learning_rate": 1.3030506953790938e-05,
"loss": 0.4971,
"step": 6770
},
{
"epoch": 1.11,
"learning_rate": 1.3019291161956036e-05,
"loss": 0.4925,
"step": 6780
},
{
"epoch": 1.11,
"learning_rate": 1.3008075370121131e-05,
"loss": 0.5005,
"step": 6790
},
{
"epoch": 1.11,
"learning_rate": 1.2996859578286227e-05,
"loss": 0.4981,
"step": 6800
},
{
"epoch": 1.11,
"learning_rate": 1.2985643786451325e-05,
"loss": 0.5063,
"step": 6810
},
{
"epoch": 1.11,
"learning_rate": 1.297442799461642e-05,
"loss": 0.4991,
"step": 6820
},
{
"epoch": 1.11,
"learning_rate": 1.2963212202781516e-05,
"loss": 0.4969,
"step": 6830
},
{
"epoch": 1.12,
"learning_rate": 1.2951996410946615e-05,
"loss": 0.491,
"step": 6840
},
{
"epoch": 1.12,
"learning_rate": 1.2940780619111711e-05,
"loss": 0.5001,
"step": 6850
},
{
"epoch": 1.12,
"learning_rate": 1.2929564827276806e-05,
"loss": 0.501,
"step": 6860
},
{
"epoch": 1.12,
"learning_rate": 1.2918349035441904e-05,
"loss": 0.5133,
"step": 6870
},
{
"epoch": 1.12,
"learning_rate": 1.2907133243607e-05,
"loss": 0.4921,
"step": 6880
},
{
"epoch": 1.12,
"learning_rate": 1.2895917451772095e-05,
"loss": 0.5047,
"step": 6890
},
{
"epoch": 1.13,
"learning_rate": 1.2884701659937193e-05,
"loss": 0.4917,
"step": 6900
},
{
"epoch": 1.13,
"learning_rate": 1.2873485868102289e-05,
"loss": 0.5017,
"step": 6910
},
{
"epoch": 1.13,
"learning_rate": 1.2862270076267385e-05,
"loss": 0.5029,
"step": 6920
},
{
"epoch": 1.13,
"learning_rate": 1.2851054284432483e-05,
"loss": 0.4866,
"step": 6930
},
{
"epoch": 1.13,
"learning_rate": 1.2839838492597578e-05,
"loss": 0.4978,
"step": 6940
},
{
"epoch": 1.13,
"learning_rate": 1.2828622700762674e-05,
"loss": 0.4744,
"step": 6950
},
{
"epoch": 1.14,
"learning_rate": 1.2817406908927773e-05,
"loss": 0.486,
"step": 6960
},
{
"epoch": 1.14,
"learning_rate": 1.2806191117092867e-05,
"loss": 0.4956,
"step": 6970
},
{
"epoch": 1.14,
"learning_rate": 1.2794975325257964e-05,
"loss": 0.4935,
"step": 6980
},
{
"epoch": 1.14,
"learning_rate": 1.2783759533423062e-05,
"loss": 0.5023,
"step": 6990
},
{
"epoch": 1.14,
"learning_rate": 1.2772543741588156e-05,
"loss": 0.4937,
"step": 7000
},
{
"epoch": 1.14,
"learning_rate": 1.2761327949753253e-05,
"loss": 0.4873,
"step": 7010
},
{
"epoch": 1.15,
"learning_rate": 1.275011215791835e-05,
"loss": 0.5001,
"step": 7020
},
{
"epoch": 1.15,
"learning_rate": 1.2738896366083447e-05,
"loss": 0.4909,
"step": 7030
},
{
"epoch": 1.15,
"learning_rate": 1.2727680574248542e-05,
"loss": 0.49,
"step": 7040
},
{
"epoch": 1.15,
"learning_rate": 1.271646478241364e-05,
"loss": 0.501,
"step": 7050
},
{
"epoch": 1.15,
"learning_rate": 1.2705248990578736e-05,
"loss": 0.4924,
"step": 7060
},
{
"epoch": 1.15,
"learning_rate": 1.269403319874383e-05,
"loss": 0.4927,
"step": 7070
},
{
"epoch": 1.16,
"learning_rate": 1.2682817406908929e-05,
"loss": 0.5003,
"step": 7080
},
{
"epoch": 1.16,
"learning_rate": 1.2671601615074025e-05,
"loss": 0.4894,
"step": 7090
},
{
"epoch": 1.16,
"learning_rate": 1.2660385823239122e-05,
"loss": 0.4939,
"step": 7100
},
{
"epoch": 1.16,
"learning_rate": 1.264917003140422e-05,
"loss": 0.4937,
"step": 7110
},
{
"epoch": 1.16,
"learning_rate": 1.2637954239569314e-05,
"loss": 0.4839,
"step": 7120
},
{
"epoch": 1.16,
"learning_rate": 1.262673844773441e-05,
"loss": 0.5071,
"step": 7130
},
{
"epoch": 1.17,
"learning_rate": 1.2615522655899509e-05,
"loss": 0.4893,
"step": 7140
},
{
"epoch": 1.17,
"learning_rate": 1.2604306864064603e-05,
"loss": 0.4954,
"step": 7150
},
{
"epoch": 1.17,
"learning_rate": 1.25930910722297e-05,
"loss": 0.4876,
"step": 7160
},
{
"epoch": 1.17,
"learning_rate": 1.2581875280394798e-05,
"loss": 0.5056,
"step": 7170
},
{
"epoch": 1.17,
"learning_rate": 1.2570659488559892e-05,
"loss": 0.4937,
"step": 7180
},
{
"epoch": 1.17,
"learning_rate": 1.2559443696724989e-05,
"loss": 0.4979,
"step": 7190
},
{
"epoch": 1.17,
"learning_rate": 1.2548227904890087e-05,
"loss": 0.4967,
"step": 7200
},
{
"epoch": 1.18,
"learning_rate": 1.2537012113055183e-05,
"loss": 0.4812,
"step": 7210
},
{
"epoch": 1.18,
"learning_rate": 1.2525796321220278e-05,
"loss": 0.4944,
"step": 7220
},
{
"epoch": 1.18,
"learning_rate": 1.2514580529385376e-05,
"loss": 0.4977,
"step": 7230
},
{
"epoch": 1.18,
"learning_rate": 1.2503364737550472e-05,
"loss": 0.4907,
"step": 7240
},
{
"epoch": 1.18,
"learning_rate": 1.2492148945715567e-05,
"loss": 0.4902,
"step": 7250
},
{
"epoch": 1.18,
"learning_rate": 1.2480933153880665e-05,
"loss": 0.4908,
"step": 7260
},
{
"epoch": 1.19,
"learning_rate": 1.2469717362045761e-05,
"loss": 0.5047,
"step": 7270
},
{
"epoch": 1.19,
"learning_rate": 1.2458501570210858e-05,
"loss": 0.4909,
"step": 7280
},
{
"epoch": 1.19,
"learning_rate": 1.2447285778375956e-05,
"loss": 0.4877,
"step": 7290
},
{
"epoch": 1.19,
"learning_rate": 1.243606998654105e-05,
"loss": 0.4972,
"step": 7300
},
{
"epoch": 1.19,
"learning_rate": 1.2424854194706147e-05,
"loss": 0.4952,
"step": 7310
},
{
"epoch": 1.19,
"learning_rate": 1.2413638402871245e-05,
"loss": 0.4881,
"step": 7320
},
{
"epoch": 1.2,
"learning_rate": 1.240242261103634e-05,
"loss": 0.4944,
"step": 7330
},
{
"epoch": 1.2,
"learning_rate": 1.2391206819201436e-05,
"loss": 0.4959,
"step": 7340
},
{
"epoch": 1.2,
"learning_rate": 1.2379991027366534e-05,
"loss": 0.4963,
"step": 7350
},
{
"epoch": 1.2,
"learning_rate": 1.2368775235531628e-05,
"loss": 0.5035,
"step": 7360
},
{
"epoch": 1.2,
"learning_rate": 1.2357559443696725e-05,
"loss": 0.5018,
"step": 7370
},
{
"epoch": 1.2,
"learning_rate": 1.2346343651861823e-05,
"loss": 0.4973,
"step": 7380
},
{
"epoch": 1.21,
"learning_rate": 1.233512786002692e-05,
"loss": 0.4998,
"step": 7390
},
{
"epoch": 1.21,
"learning_rate": 1.2323912068192014e-05,
"loss": 0.5015,
"step": 7400
},
{
"epoch": 1.21,
"learning_rate": 1.2312696276357112e-05,
"loss": 0.4906,
"step": 7410
},
{
"epoch": 1.21,
"learning_rate": 1.2301480484522208e-05,
"loss": 0.4865,
"step": 7420
},
{
"epoch": 1.21,
"learning_rate": 1.2290264692687303e-05,
"loss": 0.4918,
"step": 7430
},
{
"epoch": 1.21,
"learning_rate": 1.2279048900852401e-05,
"loss": 0.4913,
"step": 7440
},
{
"epoch": 1.22,
"learning_rate": 1.2267833109017497e-05,
"loss": 0.4915,
"step": 7450
},
{
"epoch": 1.22,
"learning_rate": 1.2256617317182594e-05,
"loss": 0.5034,
"step": 7460
},
{
"epoch": 1.22,
"learning_rate": 1.2245401525347692e-05,
"loss": 0.478,
"step": 7470
},
{
"epoch": 1.22,
"learning_rate": 1.2234185733512786e-05,
"loss": 0.4911,
"step": 7480
},
{
"epoch": 1.22,
"learning_rate": 1.2222969941677883e-05,
"loss": 0.4918,
"step": 7490
},
{
"epoch": 1.22,
"learning_rate": 1.2211754149842981e-05,
"loss": 0.4844,
"step": 7500
},
{
"epoch": 1.23,
"learning_rate": 1.2200538358008076e-05,
"loss": 0.488,
"step": 7510
},
{
"epoch": 1.23,
"learning_rate": 1.2189322566173172e-05,
"loss": 0.4928,
"step": 7520
},
{
"epoch": 1.23,
"learning_rate": 1.217810677433827e-05,
"loss": 0.4913,
"step": 7530
},
{
"epoch": 1.23,
"learning_rate": 1.2166890982503365e-05,
"loss": 0.4841,
"step": 7540
},
{
"epoch": 1.23,
"learning_rate": 1.2155675190668461e-05,
"loss": 0.4758,
"step": 7550
},
{
"epoch": 1.23,
"learning_rate": 1.2144459398833559e-05,
"loss": 0.4977,
"step": 7560
},
{
"epoch": 1.24,
"learning_rate": 1.2133243606998655e-05,
"loss": 0.4991,
"step": 7570
},
{
"epoch": 1.24,
"learning_rate": 1.212202781516375e-05,
"loss": 0.5023,
"step": 7580
},
{
"epoch": 1.24,
"learning_rate": 1.2110812023328848e-05,
"loss": 0.4849,
"step": 7590
},
{
"epoch": 1.24,
"learning_rate": 1.2099596231493944e-05,
"loss": 0.4867,
"step": 7600
},
{
"epoch": 1.24,
"learning_rate": 1.2088380439659039e-05,
"loss": 0.4962,
"step": 7610
},
{
"epoch": 1.24,
"learning_rate": 1.2077164647824137e-05,
"loss": 0.4983,
"step": 7620
},
{
"epoch": 1.25,
"learning_rate": 1.2065948855989234e-05,
"loss": 0.4894,
"step": 7630
},
{
"epoch": 1.25,
"learning_rate": 1.205473306415433e-05,
"loss": 0.4896,
"step": 7640
},
{
"epoch": 1.25,
"learning_rate": 1.2043517272319428e-05,
"loss": 0.4961,
"step": 7650
},
{
"epoch": 1.25,
"learning_rate": 1.2032301480484523e-05,
"loss": 0.4857,
"step": 7660
},
{
"epoch": 1.25,
"learning_rate": 1.2021085688649619e-05,
"loss": 0.491,
"step": 7670
},
{
"epoch": 1.25,
"learning_rate": 1.2009869896814717e-05,
"loss": 0.4829,
"step": 7680
},
{
"epoch": 1.25,
"learning_rate": 1.1998654104979812e-05,
"loss": 0.4778,
"step": 7690
},
{
"epoch": 1.26,
"learning_rate": 1.1987438313144908e-05,
"loss": 0.4795,
"step": 7700
},
{
"epoch": 1.26,
"learning_rate": 1.1976222521310006e-05,
"loss": 0.4923,
"step": 7710
},
{
"epoch": 1.26,
"learning_rate": 1.19650067294751e-05,
"loss": 0.4903,
"step": 7720
},
{
"epoch": 1.26,
"learning_rate": 1.1953790937640197e-05,
"loss": 0.5006,
"step": 7730
},
{
"epoch": 1.26,
"learning_rate": 1.1942575145805295e-05,
"loss": 0.4916,
"step": 7740
},
{
"epoch": 1.26,
"learning_rate": 1.1931359353970392e-05,
"loss": 0.496,
"step": 7750
},
{
"epoch": 1.27,
"learning_rate": 1.192014356213549e-05,
"loss": 0.4908,
"step": 7760
},
{
"epoch": 1.27,
"learning_rate": 1.1908927770300584e-05,
"loss": 0.4964,
"step": 7770
},
{
"epoch": 1.27,
"learning_rate": 1.189771197846568e-05,
"loss": 0.4954,
"step": 7780
},
{
"epoch": 1.27,
"learning_rate": 1.1886496186630779e-05,
"loss": 0.4817,
"step": 7790
},
{
"epoch": 1.27,
"learning_rate": 1.1875280394795873e-05,
"loss": 0.4917,
"step": 7800
},
{
"epoch": 1.27,
"learning_rate": 1.186406460296097e-05,
"loss": 0.4877,
"step": 7810
},
{
"epoch": 1.28,
"learning_rate": 1.1852848811126068e-05,
"loss": 0.488,
"step": 7820
},
{
"epoch": 1.28,
"learning_rate": 1.1841633019291164e-05,
"loss": 0.4982,
"step": 7830
},
{
"epoch": 1.28,
"learning_rate": 1.1830417227456259e-05,
"loss": 0.4874,
"step": 7840
},
{
"epoch": 1.28,
"learning_rate": 1.1819201435621357e-05,
"loss": 0.4925,
"step": 7850
},
{
"epoch": 1.28,
"learning_rate": 1.1807985643786453e-05,
"loss": 0.499,
"step": 7860
},
{
"epoch": 1.28,
"learning_rate": 1.1796769851951548e-05,
"loss": 0.4916,
"step": 7870
},
{
"epoch": 1.29,
"learning_rate": 1.1785554060116646e-05,
"loss": 0.4835,
"step": 7880
},
{
"epoch": 1.29,
"learning_rate": 1.1774338268281742e-05,
"loss": 0.4978,
"step": 7890
},
{
"epoch": 1.29,
"learning_rate": 1.1763122476446837e-05,
"loss": 0.4952,
"step": 7900
},
{
"epoch": 1.29,
"learning_rate": 1.1751906684611935e-05,
"loss": 0.4839,
"step": 7910
},
{
"epoch": 1.29,
"learning_rate": 1.1740690892777031e-05,
"loss": 0.4841,
"step": 7920
},
{
"epoch": 1.29,
"learning_rate": 1.1729475100942128e-05,
"loss": 0.4884,
"step": 7930
},
{
"epoch": 1.3,
"learning_rate": 1.1718259309107226e-05,
"loss": 0.4948,
"step": 7940
},
{
"epoch": 1.3,
"learning_rate": 1.170704351727232e-05,
"loss": 0.4888,
"step": 7950
},
{
"epoch": 1.3,
"learning_rate": 1.1695827725437417e-05,
"loss": 0.4922,
"step": 7960
},
{
"epoch": 1.3,
"learning_rate": 1.1684611933602515e-05,
"loss": 0.4939,
"step": 7970
},
{
"epoch": 1.3,
"learning_rate": 1.167339614176761e-05,
"loss": 0.4905,
"step": 7980
},
{
"epoch": 1.3,
"learning_rate": 1.1662180349932706e-05,
"loss": 0.4913,
"step": 7990
},
{
"epoch": 1.31,
"learning_rate": 1.1650964558097804e-05,
"loss": 0.4926,
"step": 8000
},
{
"epoch": 1.31,
"learning_rate": 1.16397487662629e-05,
"loss": 0.496,
"step": 8010
},
{
"epoch": 1.31,
"learning_rate": 1.1628532974427995e-05,
"loss": 0.4878,
"step": 8020
},
{
"epoch": 1.31,
"learning_rate": 1.1617317182593093e-05,
"loss": 0.4944,
"step": 8030
},
{
"epoch": 1.31,
"learning_rate": 1.160610139075819e-05,
"loss": 0.4954,
"step": 8040
},
{
"epoch": 1.31,
"learning_rate": 1.1594885598923284e-05,
"loss": 0.502,
"step": 8050
},
{
"epoch": 1.32,
"learning_rate": 1.1583669807088382e-05,
"loss": 0.4789,
"step": 8060
},
{
"epoch": 1.32,
"learning_rate": 1.1572454015253478e-05,
"loss": 0.4862,
"step": 8070
},
{
"epoch": 1.32,
"learning_rate": 1.1561238223418573e-05,
"loss": 0.5012,
"step": 8080
},
{
"epoch": 1.32,
"learning_rate": 1.1550022431583671e-05,
"loss": 0.4831,
"step": 8090
},
{
"epoch": 1.32,
"learning_rate": 1.1538806639748767e-05,
"loss": 0.4823,
"step": 8100
},
{
"epoch": 1.32,
"learning_rate": 1.1527590847913864e-05,
"loss": 0.4948,
"step": 8110
},
{
"epoch": 1.33,
"learning_rate": 1.1516375056078962e-05,
"loss": 0.4874,
"step": 8120
},
{
"epoch": 1.33,
"learning_rate": 1.1505159264244056e-05,
"loss": 0.4847,
"step": 8130
},
{
"epoch": 1.33,
"learning_rate": 1.1493943472409153e-05,
"loss": 0.4985,
"step": 8140
},
{
"epoch": 1.33,
"learning_rate": 1.1482727680574251e-05,
"loss": 0.4889,
"step": 8150
},
{
"epoch": 1.33,
"learning_rate": 1.1471511888739346e-05,
"loss": 0.493,
"step": 8160
},
{
"epoch": 1.33,
"learning_rate": 1.1460296096904442e-05,
"loss": 0.4945,
"step": 8170
},
{
"epoch": 1.33,
"learning_rate": 1.144908030506954e-05,
"loss": 0.4813,
"step": 8180
},
{
"epoch": 1.34,
"learning_rate": 1.1437864513234636e-05,
"loss": 0.487,
"step": 8190
},
{
"epoch": 1.34,
"learning_rate": 1.1426648721399731e-05,
"loss": 0.4875,
"step": 8200
},
{
"epoch": 1.34,
"learning_rate": 1.1415432929564829e-05,
"loss": 0.4893,
"step": 8210
},
{
"epoch": 1.34,
"learning_rate": 1.1404217137729925e-05,
"loss": 0.4984,
"step": 8220
},
{
"epoch": 1.34,
"learning_rate": 1.139300134589502e-05,
"loss": 0.5001,
"step": 8230
},
{
"epoch": 1.34,
"learning_rate": 1.1381785554060118e-05,
"loss": 0.4866,
"step": 8240
},
{
"epoch": 1.35,
"learning_rate": 1.1370569762225214e-05,
"loss": 0.4904,
"step": 8250
},
{
"epoch": 1.35,
"learning_rate": 1.135935397039031e-05,
"loss": 0.4808,
"step": 8260
},
{
"epoch": 1.35,
"learning_rate": 1.1348138178555407e-05,
"loss": 0.4902,
"step": 8270
},
{
"epoch": 1.35,
"learning_rate": 1.1336922386720504e-05,
"loss": 0.4877,
"step": 8280
},
{
"epoch": 1.35,
"learning_rate": 1.13257065948856e-05,
"loss": 0.4904,
"step": 8290
},
{
"epoch": 1.35,
"learning_rate": 1.1314490803050698e-05,
"loss": 0.4923,
"step": 8300
},
{
"epoch": 1.36,
"learning_rate": 1.1303275011215793e-05,
"loss": 0.4984,
"step": 8310
},
{
"epoch": 1.36,
"learning_rate": 1.1292059219380889e-05,
"loss": 0.487,
"step": 8320
},
{
"epoch": 1.36,
"learning_rate": 1.1280843427545987e-05,
"loss": 0.4958,
"step": 8330
},
{
"epoch": 1.36,
"learning_rate": 1.1269627635711082e-05,
"loss": 0.4884,
"step": 8340
},
{
"epoch": 1.36,
"learning_rate": 1.1258411843876178e-05,
"loss": 0.4885,
"step": 8350
},
{
"epoch": 1.36,
"learning_rate": 1.1247196052041276e-05,
"loss": 0.4871,
"step": 8360
},
{
"epoch": 1.37,
"learning_rate": 1.1235980260206372e-05,
"loss": 0.4961,
"step": 8370
},
{
"epoch": 1.37,
"learning_rate": 1.1224764468371467e-05,
"loss": 0.4842,
"step": 8380
},
{
"epoch": 1.37,
"learning_rate": 1.1213548676536565e-05,
"loss": 0.4891,
"step": 8390
},
{
"epoch": 1.37,
"learning_rate": 1.1202332884701661e-05,
"loss": 0.4836,
"step": 8400
},
{
"epoch": 1.37,
"learning_rate": 1.1191117092866756e-05,
"loss": 0.4893,
"step": 8410
},
{
"epoch": 1.37,
"learning_rate": 1.1179901301031854e-05,
"loss": 0.4785,
"step": 8420
},
{
"epoch": 1.38,
"learning_rate": 1.116868550919695e-05,
"loss": 0.4926,
"step": 8430
},
{
"epoch": 1.38,
"learning_rate": 1.1157469717362047e-05,
"loss": 0.4804,
"step": 8440
},
{
"epoch": 1.38,
"learning_rate": 1.1146253925527143e-05,
"loss": 0.4888,
"step": 8450
},
{
"epoch": 1.38,
"learning_rate": 1.113503813369224e-05,
"loss": 0.5088,
"step": 8460
},
{
"epoch": 1.38,
"learning_rate": 1.1123822341857336e-05,
"loss": 0.4904,
"step": 8470
},
{
"epoch": 1.38,
"learning_rate": 1.1112606550022434e-05,
"loss": 0.4913,
"step": 8480
},
{
"epoch": 1.39,
"learning_rate": 1.1101390758187529e-05,
"loss": 0.4901,
"step": 8490
},
{
"epoch": 1.39,
"learning_rate": 1.1090174966352625e-05,
"loss": 0.4887,
"step": 8500
},
{
"epoch": 1.39,
"learning_rate": 1.1078959174517723e-05,
"loss": 0.4816,
"step": 8510
},
{
"epoch": 1.39,
"learning_rate": 1.1067743382682818e-05,
"loss": 0.4989,
"step": 8520
},
{
"epoch": 1.39,
"learning_rate": 1.1056527590847914e-05,
"loss": 0.4807,
"step": 8530
},
{
"epoch": 1.39,
"learning_rate": 1.1045311799013012e-05,
"loss": 0.4929,
"step": 8540
},
{
"epoch": 1.4,
"learning_rate": 1.1034096007178109e-05,
"loss": 0.4752,
"step": 8550
},
{
"epoch": 1.4,
"learning_rate": 1.1022880215343203e-05,
"loss": 0.4822,
"step": 8560
},
{
"epoch": 1.4,
"learning_rate": 1.1011664423508301e-05,
"loss": 0.491,
"step": 8570
},
{
"epoch": 1.4,
"learning_rate": 1.1000448631673398e-05,
"loss": 0.4987,
"step": 8580
},
{
"epoch": 1.4,
"learning_rate": 1.0989232839838492e-05,
"loss": 0.4867,
"step": 8590
},
{
"epoch": 1.4,
"learning_rate": 1.097801704800359e-05,
"loss": 0.493,
"step": 8600
},
{
"epoch": 1.41,
"learning_rate": 1.0966801256168687e-05,
"loss": 0.5023,
"step": 8610
},
{
"epoch": 1.41,
"learning_rate": 1.0955585464333783e-05,
"loss": 0.4931,
"step": 8620
},
{
"epoch": 1.41,
"learning_rate": 1.094436967249888e-05,
"loss": 0.4875,
"step": 8630
},
{
"epoch": 1.41,
"learning_rate": 1.0933153880663976e-05,
"loss": 0.4818,
"step": 8640
},
{
"epoch": 1.41,
"learning_rate": 1.0921938088829072e-05,
"loss": 0.4828,
"step": 8650
},
{
"epoch": 1.41,
"learning_rate": 1.091072229699417e-05,
"loss": 0.4897,
"step": 8660
},
{
"epoch": 1.41,
"learning_rate": 1.0899506505159265e-05,
"loss": 0.48,
"step": 8670
},
{
"epoch": 1.42,
"learning_rate": 1.0888290713324361e-05,
"loss": 0.4865,
"step": 8680
},
{
"epoch": 1.42,
"learning_rate": 1.087707492148946e-05,
"loss": 0.4941,
"step": 8690
},
{
"epoch": 1.42,
"learning_rate": 1.0865859129654554e-05,
"loss": 0.4858,
"step": 8700
},
{
"epoch": 1.42,
"learning_rate": 1.085464333781965e-05,
"loss": 0.4864,
"step": 8710
},
{
"epoch": 1.42,
"learning_rate": 1.0843427545984748e-05,
"loss": 0.4864,
"step": 8720
},
{
"epoch": 1.42,
"learning_rate": 1.0832211754149845e-05,
"loss": 0.5011,
"step": 8730
},
{
"epoch": 1.43,
"learning_rate": 1.082099596231494e-05,
"loss": 0.4841,
"step": 8740
},
{
"epoch": 1.43,
"learning_rate": 1.0809780170480037e-05,
"loss": 0.4966,
"step": 8750
},
{
"epoch": 1.43,
"learning_rate": 1.0798564378645134e-05,
"loss": 0.5004,
"step": 8760
},
{
"epoch": 1.43,
"learning_rate": 1.0787348586810228e-05,
"loss": 0.4976,
"step": 8770
},
{
"epoch": 1.43,
"learning_rate": 1.0776132794975326e-05,
"loss": 0.481,
"step": 8780
},
{
"epoch": 1.43,
"learning_rate": 1.0764917003140423e-05,
"loss": 0.4866,
"step": 8790
},
{
"epoch": 1.44,
"learning_rate": 1.075370121130552e-05,
"loss": 0.496,
"step": 8800
},
{
"epoch": 1.44,
"learning_rate": 1.0742485419470616e-05,
"loss": 0.492,
"step": 8810
},
{
"epoch": 1.44,
"learning_rate": 1.0731269627635712e-05,
"loss": 0.4819,
"step": 8820
},
{
"epoch": 1.44,
"learning_rate": 1.0720053835800808e-05,
"loss": 0.492,
"step": 8830
},
{
"epoch": 1.44,
"learning_rate": 1.0708838043965906e-05,
"loss": 0.4818,
"step": 8840
},
{
"epoch": 1.44,
"learning_rate": 1.0697622252131001e-05,
"loss": 0.4845,
"step": 8850
},
{
"epoch": 1.45,
"learning_rate": 1.0686406460296097e-05,
"loss": 0.5005,
"step": 8860
},
{
"epoch": 1.45,
"learning_rate": 1.0675190668461195e-05,
"loss": 0.4761,
"step": 8870
},
{
"epoch": 1.45,
"learning_rate": 1.066397487662629e-05,
"loss": 0.4906,
"step": 8880
},
{
"epoch": 1.45,
"learning_rate": 1.0652759084791386e-05,
"loss": 0.4873,
"step": 8890
},
{
"epoch": 1.45,
"learning_rate": 1.0641543292956484e-05,
"loss": 0.4991,
"step": 8900
},
{
"epoch": 1.45,
"learning_rate": 1.063032750112158e-05,
"loss": 0.491,
"step": 8910
},
{
"epoch": 1.46,
"learning_rate": 1.0619111709286675e-05,
"loss": 0.4937,
"step": 8920
},
{
"epoch": 1.46,
"learning_rate": 1.0607895917451774e-05,
"loss": 0.482,
"step": 8930
},
{
"epoch": 1.46,
"learning_rate": 1.059668012561687e-05,
"loss": 0.4956,
"step": 8940
},
{
"epoch": 1.46,
"learning_rate": 1.0585464333781965e-05,
"loss": 0.4852,
"step": 8950
},
{
"epoch": 1.46,
"learning_rate": 1.0574248541947063e-05,
"loss": 0.4891,
"step": 8960
},
{
"epoch": 1.46,
"learning_rate": 1.0563032750112159e-05,
"loss": 0.4873,
"step": 8970
},
{
"epoch": 1.47,
"learning_rate": 1.0551816958277255e-05,
"loss": 0.4929,
"step": 8980
},
{
"epoch": 1.47,
"learning_rate": 1.0540601166442352e-05,
"loss": 0.4868,
"step": 8990
},
{
"epoch": 1.47,
"learning_rate": 1.0529385374607448e-05,
"loss": 0.4931,
"step": 9000
},
{
"epoch": 1.47,
"learning_rate": 1.0518169582772544e-05,
"loss": 0.4922,
"step": 9010
},
{
"epoch": 1.47,
"learning_rate": 1.0506953790937642e-05,
"loss": 0.4791,
"step": 9020
},
{
"epoch": 1.47,
"learning_rate": 1.0495737999102737e-05,
"loss": 0.4915,
"step": 9030
},
{
"epoch": 1.48,
"learning_rate": 1.0484522207267833e-05,
"loss": 0.4853,
"step": 9040
},
{
"epoch": 1.48,
"learning_rate": 1.0473306415432931e-05,
"loss": 0.4826,
"step": 9050
},
{
"epoch": 1.48,
"learning_rate": 1.0462090623598026e-05,
"loss": 0.4948,
"step": 9060
},
{
"epoch": 1.48,
"learning_rate": 1.0450874831763123e-05,
"loss": 0.4932,
"step": 9070
},
{
"epoch": 1.48,
"learning_rate": 1.043965903992822e-05,
"loss": 0.4855,
"step": 9080
},
{
"epoch": 1.48,
"learning_rate": 1.0428443248093317e-05,
"loss": 0.4912,
"step": 9090
},
{
"epoch": 1.48,
"learning_rate": 1.0417227456258412e-05,
"loss": 0.4895,
"step": 9100
},
{
"epoch": 1.49,
"learning_rate": 1.040601166442351e-05,
"loss": 0.4907,
"step": 9110
},
{
"epoch": 1.49,
"learning_rate": 1.0394795872588606e-05,
"loss": 0.486,
"step": 9120
},
{
"epoch": 1.49,
"learning_rate": 1.03835800807537e-05,
"loss": 0.4858,
"step": 9130
},
{
"epoch": 1.49,
"learning_rate": 1.0372364288918799e-05,
"loss": 0.4949,
"step": 9140
},
{
"epoch": 1.49,
"learning_rate": 1.0361148497083895e-05,
"loss": 0.4826,
"step": 9150
},
{
"epoch": 1.49,
"learning_rate": 1.0349932705248991e-05,
"loss": 0.4847,
"step": 9160
},
{
"epoch": 1.5,
"learning_rate": 1.0338716913414088e-05,
"loss": 0.4926,
"step": 9170
},
{
"epoch": 1.5,
"learning_rate": 1.0327501121579184e-05,
"loss": 0.4815,
"step": 9180
},
{
"epoch": 1.5,
"learning_rate": 1.031628532974428e-05,
"loss": 0.489,
"step": 9190
},
{
"epoch": 1.5,
"learning_rate": 1.0305069537909379e-05,
"loss": 0.4925,
"step": 9200
},
{
"epoch": 1.5,
"learning_rate": 1.0293853746074473e-05,
"loss": 0.4765,
"step": 9210
},
{
"epoch": 1.5,
"learning_rate": 1.028263795423957e-05,
"loss": 0.4857,
"step": 9220
},
{
"epoch": 1.51,
"learning_rate": 1.0271422162404668e-05,
"loss": 0.4905,
"step": 9230
},
{
"epoch": 1.51,
"learning_rate": 1.0260206370569762e-05,
"loss": 0.4887,
"step": 9240
},
{
"epoch": 1.51,
"learning_rate": 1.0248990578734859e-05,
"loss": 0.4823,
"step": 9250
},
{
"epoch": 1.51,
"learning_rate": 1.0237774786899957e-05,
"loss": 0.4873,
"step": 9260
},
{
"epoch": 1.51,
"learning_rate": 1.0226558995065053e-05,
"loss": 0.4931,
"step": 9270
},
{
"epoch": 1.51,
"learning_rate": 1.0215343203230148e-05,
"loss": 0.4917,
"step": 9280
},
{
"epoch": 1.52,
"learning_rate": 1.0204127411395246e-05,
"loss": 0.4895,
"step": 9290
},
{
"epoch": 1.52,
"learning_rate": 1.0192911619560342e-05,
"loss": 0.4763,
"step": 9300
},
{
"epoch": 1.52,
"learning_rate": 1.0181695827725437e-05,
"loss": 0.4917,
"step": 9310
},
{
"epoch": 1.52,
"learning_rate": 1.0170480035890535e-05,
"loss": 0.4755,
"step": 9320
},
{
"epoch": 1.52,
"learning_rate": 1.0159264244055631e-05,
"loss": 0.503,
"step": 9330
},
{
"epoch": 1.52,
"learning_rate": 1.0148048452220728e-05,
"loss": 0.4761,
"step": 9340
},
{
"epoch": 1.53,
"learning_rate": 1.0136832660385824e-05,
"loss": 0.479,
"step": 9350
},
{
"epoch": 1.53,
"learning_rate": 1.012561686855092e-05,
"loss": 0.4863,
"step": 9360
},
{
"epoch": 1.53,
"learning_rate": 1.0114401076716017e-05,
"loss": 0.491,
"step": 9370
},
{
"epoch": 1.53,
"learning_rate": 1.0103185284881115e-05,
"loss": 0.4828,
"step": 9380
},
{
"epoch": 1.53,
"learning_rate": 1.009196949304621e-05,
"loss": 0.4839,
"step": 9390
},
{
"epoch": 1.53,
"learning_rate": 1.0080753701211306e-05,
"loss": 0.4931,
"step": 9400
},
{
"epoch": 1.54,
"learning_rate": 1.0069537909376404e-05,
"loss": 0.4743,
"step": 9410
},
{
"epoch": 1.54,
"learning_rate": 1.0058322117541498e-05,
"loss": 0.4825,
"step": 9420
},
{
"epoch": 1.54,
"learning_rate": 1.0047106325706595e-05,
"loss": 0.4883,
"step": 9430
},
{
"epoch": 1.54,
"learning_rate": 1.0035890533871693e-05,
"loss": 0.4772,
"step": 9440
},
{
"epoch": 1.54,
"learning_rate": 1.002467474203679e-05,
"loss": 0.4868,
"step": 9450
},
{
"epoch": 1.54,
"learning_rate": 1.0013458950201884e-05,
"loss": 0.4832,
"step": 9460
},
{
"epoch": 1.55,
"learning_rate": 1.0002243158366982e-05,
"loss": 0.4858,
"step": 9470
},
{
"epoch": 1.55,
"learning_rate": 9.991027366532078e-06,
"loss": 0.4846,
"step": 9480
},
{
"epoch": 1.55,
"learning_rate": 9.979811574697175e-06,
"loss": 0.486,
"step": 9490
},
{
"epoch": 1.55,
"learning_rate": 9.968595782862271e-06,
"loss": 0.487,
"step": 9500
},
{
"epoch": 1.55,
"learning_rate": 9.957379991027367e-06,
"loss": 0.4783,
"step": 9510
},
{
"epoch": 1.55,
"learning_rate": 9.946164199192464e-06,
"loss": 0.4837,
"step": 9520
},
{
"epoch": 1.56,
"learning_rate": 9.93494840735756e-06,
"loss": 0.4794,
"step": 9530
},
{
"epoch": 1.56,
"learning_rate": 9.923732615522656e-06,
"loss": 0.4787,
"step": 9540
},
{
"epoch": 1.56,
"learning_rate": 9.912516823687753e-06,
"loss": 0.4906,
"step": 9550
},
{
"epoch": 1.56,
"learning_rate": 9.901301031852849e-06,
"loss": 0.4844,
"step": 9560
},
{
"epoch": 1.56,
"learning_rate": 9.890085240017945e-06,
"loss": 0.4828,
"step": 9570
},
{
"epoch": 1.56,
"learning_rate": 9.878869448183044e-06,
"loss": 0.4763,
"step": 9580
},
{
"epoch": 1.56,
"learning_rate": 9.867653656348138e-06,
"loss": 0.4879,
"step": 9590
},
{
"epoch": 1.57,
"learning_rate": 9.856437864513235e-06,
"loss": 0.4978,
"step": 9600
},
{
"epoch": 1.57,
"learning_rate": 9.845222072678333e-06,
"loss": 0.4805,
"step": 9610
},
{
"epoch": 1.57,
"learning_rate": 9.834006280843429e-06,
"loss": 0.4924,
"step": 9620
},
{
"epoch": 1.57,
"learning_rate": 9.822790489008525e-06,
"loss": 0.4909,
"step": 9630
},
{
"epoch": 1.57,
"learning_rate": 9.811574697173622e-06,
"loss": 0.4831,
"step": 9640
},
{
"epoch": 1.57,
"learning_rate": 9.800358905338718e-06,
"loss": 0.485,
"step": 9650
},
{
"epoch": 1.58,
"learning_rate": 9.789143113503814e-06,
"loss": 0.4802,
"step": 9660
},
{
"epoch": 1.58,
"learning_rate": 9.77792732166891e-06,
"loss": 0.4951,
"step": 9670
},
{
"epoch": 1.58,
"learning_rate": 9.766711529834007e-06,
"loss": 0.4784,
"step": 9680
},
{
"epoch": 1.58,
"learning_rate": 9.755495737999103e-06,
"loss": 0.4788,
"step": 9690
},
{
"epoch": 1.58,
"learning_rate": 9.7442799461642e-06,
"loss": 0.4798,
"step": 9700
},
{
"epoch": 1.58,
"learning_rate": 9.733064154329296e-06,
"loss": 0.4858,
"step": 9710
},
{
"epoch": 1.59,
"learning_rate": 9.721848362494393e-06,
"loss": 0.4845,
"step": 9720
},
{
"epoch": 1.59,
"learning_rate": 9.710632570659489e-06,
"loss": 0.4871,
"step": 9730
},
{
"epoch": 1.59,
"learning_rate": 9.699416778824587e-06,
"loss": 0.4809,
"step": 9740
},
{
"epoch": 1.59,
"learning_rate": 9.688200986989682e-06,
"loss": 0.4902,
"step": 9750
},
{
"epoch": 1.59,
"learning_rate": 9.67698519515478e-06,
"loss": 0.4961,
"step": 9760
},
{
"epoch": 1.59,
"learning_rate": 9.665769403319876e-06,
"loss": 0.4749,
"step": 9770
},
{
"epoch": 1.6,
"learning_rate": 9.65455361148497e-06,
"loss": 0.4922,
"step": 9780
},
{
"epoch": 1.6,
"learning_rate": 9.643337819650069e-06,
"loss": 0.4824,
"step": 9790
},
{
"epoch": 1.6,
"learning_rate": 9.632122027815165e-06,
"loss": 0.4853,
"step": 9800
},
{
"epoch": 1.6,
"learning_rate": 9.620906235980261e-06,
"loss": 0.4808,
"step": 9810
},
{
"epoch": 1.6,
"learning_rate": 9.609690444145358e-06,
"loss": 0.4878,
"step": 9820
},
{
"epoch": 1.6,
"learning_rate": 9.598474652310454e-06,
"loss": 0.4847,
"step": 9830
},
{
"epoch": 1.61,
"learning_rate": 9.58725886047555e-06,
"loss": 0.4722,
"step": 9840
},
{
"epoch": 1.61,
"learning_rate": 9.576043068640647e-06,
"loss": 0.492,
"step": 9850
},
{
"epoch": 1.61,
"learning_rate": 9.564827276805743e-06,
"loss": 0.4868,
"step": 9860
},
{
"epoch": 1.61,
"learning_rate": 9.55361148497084e-06,
"loss": 0.4877,
"step": 9870
},
{
"epoch": 1.61,
"learning_rate": 9.542395693135936e-06,
"loss": 0.4925,
"step": 9880
},
{
"epoch": 1.61,
"learning_rate": 9.531179901301032e-06,
"loss": 0.487,
"step": 9890
},
{
"epoch": 1.62,
"learning_rate": 9.519964109466129e-06,
"loss": 0.477,
"step": 9900
},
{
"epoch": 1.62,
"learning_rate": 9.508748317631225e-06,
"loss": 0.4781,
"step": 9910
},
{
"epoch": 1.62,
"learning_rate": 9.497532525796323e-06,
"loss": 0.4943,
"step": 9920
},
{
"epoch": 1.62,
"learning_rate": 9.486316733961418e-06,
"loss": 0.4877,
"step": 9930
},
{
"epoch": 1.62,
"learning_rate": 9.475100942126516e-06,
"loss": 0.4861,
"step": 9940
},
{
"epoch": 1.62,
"learning_rate": 9.463885150291612e-06,
"loss": 0.4761,
"step": 9950
},
{
"epoch": 1.63,
"learning_rate": 9.452669358456707e-06,
"loss": 0.4803,
"step": 9960
},
{
"epoch": 1.63,
"learning_rate": 9.441453566621805e-06,
"loss": 0.4799,
"step": 9970
},
{
"epoch": 1.63,
"learning_rate": 9.430237774786901e-06,
"loss": 0.4706,
"step": 9980
},
{
"epoch": 1.63,
"learning_rate": 9.419021982951998e-06,
"loss": 0.4929,
"step": 9990
},
{
"epoch": 1.63,
"learning_rate": 9.407806191117094e-06,
"loss": 0.4891,
"step": 10000
},
{
"epoch": 1.63,
"learning_rate": 9.39659039928219e-06,
"loss": 0.4708,
"step": 10010
},
{
"epoch": 1.64,
"learning_rate": 9.385374607447287e-06,
"loss": 0.4793,
"step": 10020
},
{
"epoch": 1.64,
"learning_rate": 9.374158815612383e-06,
"loss": 0.4819,
"step": 10030
},
{
"epoch": 1.64,
"learning_rate": 9.36294302377748e-06,
"loss": 0.4805,
"step": 10040
},
{
"epoch": 1.64,
"learning_rate": 9.351727231942576e-06,
"loss": 0.4882,
"step": 10050
},
{
"epoch": 1.64,
"learning_rate": 9.340511440107672e-06,
"loss": 0.4785,
"step": 10060
},
{
"epoch": 1.64,
"learning_rate": 9.329295648272768e-06,
"loss": 0.4856,
"step": 10070
},
{
"epoch": 1.64,
"learning_rate": 9.318079856437865e-06,
"loss": 0.4885,
"step": 10080
},
{
"epoch": 1.65,
"learning_rate": 9.306864064602961e-06,
"loss": 0.4737,
"step": 10090
},
{
"epoch": 1.65,
"learning_rate": 9.29564827276806e-06,
"loss": 0.4882,
"step": 10100
},
{
"epoch": 1.65,
"learning_rate": 9.284432480933154e-06,
"loss": 0.4874,
"step": 10110
},
{
"epoch": 1.65,
"learning_rate": 9.273216689098252e-06,
"loss": 0.4861,
"step": 10120
},
{
"epoch": 1.65,
"learning_rate": 9.262000897263348e-06,
"loss": 0.4882,
"step": 10130
},
{
"epoch": 1.65,
"learning_rate": 9.250785105428443e-06,
"loss": 0.4722,
"step": 10140
},
{
"epoch": 1.66,
"learning_rate": 9.239569313593541e-06,
"loss": 0.4803,
"step": 10150
},
{
"epoch": 1.66,
"learning_rate": 9.228353521758637e-06,
"loss": 0.467,
"step": 10160
},
{
"epoch": 1.66,
"learning_rate": 9.217137729923734e-06,
"loss": 0.4861,
"step": 10170
},
{
"epoch": 1.66,
"learning_rate": 9.20592193808883e-06,
"loss": 0.4878,
"step": 10180
},
{
"epoch": 1.66,
"learning_rate": 9.194706146253926e-06,
"loss": 0.4742,
"step": 10190
},
{
"epoch": 1.66,
"learning_rate": 9.183490354419023e-06,
"loss": 0.4746,
"step": 10200
},
{
"epoch": 1.67,
"learning_rate": 9.172274562584119e-06,
"loss": 0.4853,
"step": 10210
},
{
"epoch": 1.67,
"learning_rate": 9.161058770749215e-06,
"loss": 0.4875,
"step": 10220
},
{
"epoch": 1.67,
"learning_rate": 9.149842978914312e-06,
"loss": 0.4843,
"step": 10230
},
{
"epoch": 1.67,
"learning_rate": 9.138627187079408e-06,
"loss": 0.4984,
"step": 10240
},
{
"epoch": 1.67,
"learning_rate": 9.127411395244505e-06,
"loss": 0.485,
"step": 10250
},
{
"epoch": 1.67,
"learning_rate": 9.116195603409601e-06,
"loss": 0.4859,
"step": 10260
},
{
"epoch": 1.68,
"learning_rate": 9.104979811574697e-06,
"loss": 0.4807,
"step": 10270
},
{
"epoch": 1.68,
"learning_rate": 9.093764019739795e-06,
"loss": 0.4842,
"step": 10280
},
{
"epoch": 1.68,
"learning_rate": 9.08254822790489e-06,
"loss": 0.4626,
"step": 10290
},
{
"epoch": 1.68,
"learning_rate": 9.071332436069988e-06,
"loss": 0.4593,
"step": 10300
},
{
"epoch": 1.68,
"learning_rate": 9.060116644235084e-06,
"loss": 0.4902,
"step": 10310
},
{
"epoch": 1.68,
"learning_rate": 9.048900852400179e-06,
"loss": 0.4693,
"step": 10320
},
{
"epoch": 1.69,
"learning_rate": 9.037685060565277e-06,
"loss": 0.4949,
"step": 10330
},
{
"epoch": 1.69,
"learning_rate": 9.026469268730373e-06,
"loss": 0.4833,
"step": 10340
},
{
"epoch": 1.69,
"learning_rate": 9.01525347689547e-06,
"loss": 0.4874,
"step": 10350
},
{
"epoch": 1.69,
"learning_rate": 9.004037685060566e-06,
"loss": 0.4766,
"step": 10360
},
{
"epoch": 1.69,
"learning_rate": 8.992821893225663e-06,
"loss": 0.4739,
"step": 10370
},
{
"epoch": 1.69,
"learning_rate": 8.981606101390759e-06,
"loss": 0.4683,
"step": 10380
},
{
"epoch": 1.7,
"learning_rate": 8.970390309555855e-06,
"loss": 0.4887,
"step": 10390
},
{
"epoch": 1.7,
"learning_rate": 8.959174517720952e-06,
"loss": 0.4944,
"step": 10400
},
{
"epoch": 1.7,
"learning_rate": 8.947958725886048e-06,
"loss": 0.4843,
"step": 10410
},
{
"epoch": 1.7,
"learning_rate": 8.936742934051144e-06,
"loss": 0.4733,
"step": 10420
},
{
"epoch": 1.7,
"learning_rate": 8.925527142216242e-06,
"loss": 0.476,
"step": 10430
},
{
"epoch": 1.7,
"learning_rate": 8.914311350381337e-06,
"loss": 0.4806,
"step": 10440
},
{
"epoch": 1.71,
"learning_rate": 8.903095558546433e-06,
"loss": 0.49,
"step": 10450
},
{
"epoch": 1.71,
"learning_rate": 8.891879766711531e-06,
"loss": 0.472,
"step": 10460
},
{
"epoch": 1.71,
"learning_rate": 8.880663974876626e-06,
"loss": 0.4827,
"step": 10470
},
{
"epoch": 1.71,
"learning_rate": 8.869448183041724e-06,
"loss": 0.4752,
"step": 10480
},
{
"epoch": 1.71,
"learning_rate": 8.85823239120682e-06,
"loss": 0.4922,
"step": 10490
},
{
"epoch": 1.71,
"learning_rate": 8.847016599371915e-06,
"loss": 0.487,
"step": 10500
},
{
"epoch": 1.72,
"learning_rate": 8.835800807537013e-06,
"loss": 0.4892,
"step": 10510
},
{
"epoch": 1.72,
"learning_rate": 8.82458501570211e-06,
"loss": 0.4788,
"step": 10520
},
{
"epoch": 1.72,
"learning_rate": 8.813369223867206e-06,
"loss": 0.473,
"step": 10530
},
{
"epoch": 1.72,
"learning_rate": 8.802153432032302e-06,
"loss": 0.4749,
"step": 10540
},
{
"epoch": 1.72,
"learning_rate": 8.790937640197399e-06,
"loss": 0.4792,
"step": 10550
},
{
"epoch": 1.72,
"learning_rate": 8.779721848362495e-06,
"loss": 0.4705,
"step": 10560
},
{
"epoch": 1.72,
"learning_rate": 8.768506056527591e-06,
"loss": 0.4854,
"step": 10570
},
{
"epoch": 1.73,
"learning_rate": 8.757290264692688e-06,
"loss": 0.49,
"step": 10580
},
{
"epoch": 1.73,
"learning_rate": 8.746074472857784e-06,
"loss": 0.4821,
"step": 10590
},
{
"epoch": 1.73,
"learning_rate": 8.73485868102288e-06,
"loss": 0.4762,
"step": 10600
},
{
"epoch": 1.73,
"learning_rate": 8.723642889187978e-06,
"loss": 0.4871,
"step": 10610
},
{
"epoch": 1.73,
"learning_rate": 8.712427097353073e-06,
"loss": 0.4743,
"step": 10620
},
{
"epoch": 1.73,
"learning_rate": 8.70121130551817e-06,
"loss": 0.4749,
"step": 10630
},
{
"epoch": 1.74,
"learning_rate": 8.689995513683268e-06,
"loss": 0.4855,
"step": 10640
},
{
"epoch": 1.74,
"learning_rate": 8.678779721848362e-06,
"loss": 0.4769,
"step": 10650
},
{
"epoch": 1.74,
"learning_rate": 8.66756393001346e-06,
"loss": 0.478,
"step": 10660
},
{
"epoch": 1.74,
"learning_rate": 8.656348138178557e-06,
"loss": 0.4818,
"step": 10670
},
{
"epoch": 1.74,
"learning_rate": 8.645132346343651e-06,
"loss": 0.469,
"step": 10680
},
{
"epoch": 1.74,
"learning_rate": 8.63391655450875e-06,
"loss": 0.4792,
"step": 10690
},
{
"epoch": 1.75,
"learning_rate": 8.622700762673846e-06,
"loss": 0.4864,
"step": 10700
},
{
"epoch": 1.75,
"learning_rate": 8.611484970838942e-06,
"loss": 0.4849,
"step": 10710
},
{
"epoch": 1.75,
"learning_rate": 8.600269179004038e-06,
"loss": 0.4867,
"step": 10720
},
{
"epoch": 1.75,
"learning_rate": 8.589053387169135e-06,
"loss": 0.4735,
"step": 10730
},
{
"epoch": 1.75,
"learning_rate": 8.577837595334231e-06,
"loss": 0.49,
"step": 10740
},
{
"epoch": 1.75,
"learning_rate": 8.566621803499327e-06,
"loss": 0.4854,
"step": 10750
},
{
"epoch": 1.76,
"learning_rate": 8.555406011664424e-06,
"loss": 0.4781,
"step": 10760
},
{
"epoch": 1.76,
"learning_rate": 8.54419021982952e-06,
"loss": 0.4723,
"step": 10770
},
{
"epoch": 1.76,
"learning_rate": 8.532974427994617e-06,
"loss": 0.4835,
"step": 10780
},
{
"epoch": 1.76,
"learning_rate": 8.521758636159715e-06,
"loss": 0.4752,
"step": 10790
},
{
"epoch": 1.76,
"learning_rate": 8.51054284432481e-06,
"loss": 0.4745,
"step": 10800
},
{
"epoch": 1.76,
"learning_rate": 8.499327052489906e-06,
"loss": 0.4869,
"step": 10810
},
{
"epoch": 1.77,
"learning_rate": 8.488111260655004e-06,
"loss": 0.4875,
"step": 10820
},
{
"epoch": 1.77,
"learning_rate": 8.476895468820098e-06,
"loss": 0.4837,
"step": 10830
},
{
"epoch": 1.77,
"learning_rate": 8.465679676985196e-06,
"loss": 0.477,
"step": 10840
},
{
"epoch": 1.77,
"learning_rate": 8.454463885150293e-06,
"loss": 0.4963,
"step": 10850
},
{
"epoch": 1.77,
"learning_rate": 8.443248093315389e-06,
"loss": 0.486,
"step": 10860
},
{
"epoch": 1.77,
"learning_rate": 8.432032301480485e-06,
"loss": 0.4843,
"step": 10870
},
{
"epoch": 1.78,
"learning_rate": 8.420816509645582e-06,
"loss": 0.4866,
"step": 10880
},
{
"epoch": 1.78,
"learning_rate": 8.409600717810678e-06,
"loss": 0.4873,
"step": 10890
},
{
"epoch": 1.78,
"learning_rate": 8.398384925975775e-06,
"loss": 0.4797,
"step": 10900
},
{
"epoch": 1.78,
"learning_rate": 8.387169134140871e-06,
"loss": 0.4702,
"step": 10910
},
{
"epoch": 1.78,
"learning_rate": 8.375953342305967e-06,
"loss": 0.4971,
"step": 10920
},
{
"epoch": 1.78,
"learning_rate": 8.364737550471064e-06,
"loss": 0.4778,
"step": 10930
},
{
"epoch": 1.79,
"learning_rate": 8.35352175863616e-06,
"loss": 0.4836,
"step": 10940
},
{
"epoch": 1.79,
"learning_rate": 8.342305966801258e-06,
"loss": 0.4871,
"step": 10950
},
{
"epoch": 1.79,
"learning_rate": 8.331090174966353e-06,
"loss": 0.4664,
"step": 10960
},
{
"epoch": 1.79,
"learning_rate": 8.31987438313145e-06,
"loss": 0.4833,
"step": 10970
},
{
"epoch": 1.79,
"learning_rate": 8.308658591296547e-06,
"loss": 0.4874,
"step": 10980
},
{
"epoch": 1.79,
"learning_rate": 8.297442799461642e-06,
"loss": 0.4779,
"step": 10990
},
{
"epoch": 1.8,
"learning_rate": 8.28622700762674e-06,
"loss": 0.4789,
"step": 11000
},
{
"epoch": 1.8,
"learning_rate": 8.275011215791836e-06,
"loss": 0.4619,
"step": 11010
},
{
"epoch": 1.8,
"learning_rate": 8.263795423956933e-06,
"loss": 0.4737,
"step": 11020
},
{
"epoch": 1.8,
"learning_rate": 8.252579632122029e-06,
"loss": 0.4787,
"step": 11030
},
{
"epoch": 1.8,
"learning_rate": 8.241363840287125e-06,
"loss": 0.4809,
"step": 11040
},
{
"epoch": 1.8,
"learning_rate": 8.230148048452222e-06,
"loss": 0.479,
"step": 11050
},
{
"epoch": 1.8,
"learning_rate": 8.218932256617318e-06,
"loss": 0.4817,
"step": 11060
},
{
"epoch": 1.81,
"learning_rate": 8.207716464782414e-06,
"loss": 0.4586,
"step": 11070
},
{
"epoch": 1.81,
"learning_rate": 8.19650067294751e-06,
"loss": 0.488,
"step": 11080
},
{
"epoch": 1.81,
"learning_rate": 8.185284881112607e-06,
"loss": 0.4714,
"step": 11090
},
{
"epoch": 1.81,
"learning_rate": 8.174069089277703e-06,
"loss": 0.4828,
"step": 11100
},
{
"epoch": 1.81,
"learning_rate": 8.1628532974428e-06,
"loss": 0.4935,
"step": 11110
},
{
"epoch": 1.81,
"learning_rate": 8.151637505607896e-06,
"loss": 0.4799,
"step": 11120
},
{
"epoch": 1.82,
"learning_rate": 8.140421713772994e-06,
"loss": 0.483,
"step": 11130
},
{
"epoch": 1.82,
"learning_rate": 8.129205921938089e-06,
"loss": 0.4798,
"step": 11140
},
{
"epoch": 1.82,
"learning_rate": 8.117990130103187e-06,
"loss": 0.4727,
"step": 11150
},
{
"epoch": 1.82,
"learning_rate": 8.106774338268283e-06,
"loss": 0.4746,
"step": 11160
},
{
"epoch": 1.82,
"learning_rate": 8.095558546433378e-06,
"loss": 0.4844,
"step": 11170
},
{
"epoch": 1.82,
"learning_rate": 8.084342754598476e-06,
"loss": 0.4718,
"step": 11180
},
{
"epoch": 1.83,
"learning_rate": 8.073126962763572e-06,
"loss": 0.4758,
"step": 11190
},
{
"epoch": 1.83,
"learning_rate": 8.061911170928669e-06,
"loss": 0.4762,
"step": 11200
},
{
"epoch": 1.83,
"learning_rate": 8.050695379093765e-06,
"loss": 0.4765,
"step": 11210
},
{
"epoch": 1.83,
"learning_rate": 8.039479587258861e-06,
"loss": 0.4748,
"step": 11220
},
{
"epoch": 1.83,
"learning_rate": 8.028263795423958e-06,
"loss": 0.481,
"step": 11230
},
{
"epoch": 1.83,
"learning_rate": 8.017048003589054e-06,
"loss": 0.4715,
"step": 11240
},
{
"epoch": 1.84,
"learning_rate": 8.00583221175415e-06,
"loss": 0.4802,
"step": 11250
},
{
"epoch": 1.84,
"learning_rate": 7.994616419919247e-06,
"loss": 0.4812,
"step": 11260
},
{
"epoch": 1.84,
"learning_rate": 7.983400628084343e-06,
"loss": 0.4891,
"step": 11270
},
{
"epoch": 1.84,
"learning_rate": 7.97218483624944e-06,
"loss": 0.4914,
"step": 11280
},
{
"epoch": 1.84,
"learning_rate": 7.960969044414536e-06,
"loss": 0.4749,
"step": 11290
},
{
"epoch": 1.84,
"learning_rate": 7.949753252579632e-06,
"loss": 0.4697,
"step": 11300
},
{
"epoch": 1.85,
"learning_rate": 7.93853746074473e-06,
"loss": 0.4848,
"step": 11310
},
{
"epoch": 1.85,
"learning_rate": 7.927321668909825e-06,
"loss": 0.4861,
"step": 11320
},
{
"epoch": 1.85,
"learning_rate": 7.916105877074923e-06,
"loss": 0.4813,
"step": 11330
},
{
"epoch": 1.85,
"learning_rate": 7.90489008524002e-06,
"loss": 0.4799,
"step": 11340
},
{
"epoch": 1.85,
"learning_rate": 7.893674293405114e-06,
"loss": 0.4856,
"step": 11350
},
{
"epoch": 1.85,
"learning_rate": 7.882458501570212e-06,
"loss": 0.4823,
"step": 11360
},
{
"epoch": 1.86,
"learning_rate": 7.871242709735308e-06,
"loss": 0.4806,
"step": 11370
},
{
"epoch": 1.86,
"learning_rate": 7.860026917900405e-06,
"loss": 0.4802,
"step": 11380
},
{
"epoch": 1.86,
"learning_rate": 7.848811126065501e-06,
"loss": 0.489,
"step": 11390
},
{
"epoch": 1.86,
"learning_rate": 7.837595334230597e-06,
"loss": 0.4811,
"step": 11400
},
{
"epoch": 1.86,
"learning_rate": 7.826379542395694e-06,
"loss": 0.4777,
"step": 11410
},
{
"epoch": 1.86,
"learning_rate": 7.81516375056079e-06,
"loss": 0.4803,
"step": 11420
},
{
"epoch": 1.87,
"learning_rate": 7.803947958725887e-06,
"loss": 0.4757,
"step": 11430
},
{
"epoch": 1.87,
"learning_rate": 7.792732166890983e-06,
"loss": 0.4696,
"step": 11440
},
{
"epoch": 1.87,
"learning_rate": 7.78151637505608e-06,
"loss": 0.4754,
"step": 11450
},
{
"epoch": 1.87,
"learning_rate": 7.770300583221176e-06,
"loss": 0.4764,
"step": 11460
},
{
"epoch": 1.87,
"learning_rate": 7.759084791386272e-06,
"loss": 0.4717,
"step": 11470
},
{
"epoch": 1.87,
"learning_rate": 7.747868999551368e-06,
"loss": 0.4794,
"step": 11480
},
{
"epoch": 1.88,
"learning_rate": 7.736653207716466e-06,
"loss": 0.4758,
"step": 11490
},
{
"epoch": 1.88,
"learning_rate": 7.725437415881561e-06,
"loss": 0.4767,
"step": 11500
},
{
"epoch": 1.88,
"learning_rate": 7.714221624046659e-06,
"loss": 0.4745,
"step": 11510
},
{
"epoch": 1.88,
"learning_rate": 7.703005832211755e-06,
"loss": 0.475,
"step": 11520
},
{
"epoch": 1.88,
"learning_rate": 7.69179004037685e-06,
"loss": 0.4748,
"step": 11530
},
{
"epoch": 1.88,
"learning_rate": 7.680574248541948e-06,
"loss": 0.4808,
"step": 11540
},
{
"epoch": 1.88,
"learning_rate": 7.669358456707045e-06,
"loss": 0.4655,
"step": 11550
},
{
"epoch": 1.89,
"learning_rate": 7.658142664872141e-06,
"loss": 0.4846,
"step": 11560
},
{
"epoch": 1.89,
"learning_rate": 7.646926873037237e-06,
"loss": 0.4688,
"step": 11570
},
{
"epoch": 1.89,
"learning_rate": 7.635711081202334e-06,
"loss": 0.4665,
"step": 11580
},
{
"epoch": 1.89,
"learning_rate": 7.62449528936743e-06,
"loss": 0.4782,
"step": 11590
},
{
"epoch": 1.89,
"learning_rate": 7.613279497532526e-06,
"loss": 0.4785,
"step": 11600
},
{
"epoch": 1.89,
"learning_rate": 7.6020637056976235e-06,
"loss": 0.4821,
"step": 11610
},
{
"epoch": 1.9,
"learning_rate": 7.590847913862719e-06,
"loss": 0.4835,
"step": 11620
},
{
"epoch": 1.9,
"learning_rate": 7.579632122027815e-06,
"loss": 0.4875,
"step": 11630
},
{
"epoch": 1.9,
"learning_rate": 7.568416330192913e-06,
"loss": 0.4837,
"step": 11640
},
{
"epoch": 1.9,
"learning_rate": 7.557200538358008e-06,
"loss": 0.4819,
"step": 11650
},
{
"epoch": 1.9,
"learning_rate": 7.545984746523105e-06,
"loss": 0.4704,
"step": 11660
},
{
"epoch": 1.9,
"learning_rate": 7.534768954688202e-06,
"loss": 0.477,
"step": 11670
},
{
"epoch": 1.91,
"learning_rate": 7.523553162853298e-06,
"loss": 0.4685,
"step": 11680
},
{
"epoch": 1.91,
"learning_rate": 7.512337371018394e-06,
"loss": 0.4822,
"step": 11690
},
{
"epoch": 1.91,
"learning_rate": 7.5011215791834916e-06,
"loss": 0.4747,
"step": 11700
},
{
"epoch": 1.91,
"learning_rate": 7.489905787348587e-06,
"loss": 0.4768,
"step": 11710
},
{
"epoch": 1.91,
"learning_rate": 7.4786899955136834e-06,
"loss": 0.4722,
"step": 11720
},
{
"epoch": 1.91,
"learning_rate": 7.467474203678781e-06,
"loss": 0.4909,
"step": 11730
},
{
"epoch": 1.92,
"learning_rate": 7.456258411843876e-06,
"loss": 0.4749,
"step": 11740
},
{
"epoch": 1.92,
"learning_rate": 7.445042620008973e-06,
"loss": 0.4766,
"step": 11750
},
{
"epoch": 1.92,
"learning_rate": 7.43382682817407e-06,
"loss": 0.4767,
"step": 11760
},
{
"epoch": 1.92,
"learning_rate": 7.422611036339166e-06,
"loss": 0.4737,
"step": 11770
},
{
"epoch": 1.92,
"learning_rate": 7.4113952445042624e-06,
"loss": 0.4827,
"step": 11780
},
{
"epoch": 1.92,
"learning_rate": 7.40017945266936e-06,
"loss": 0.4819,
"step": 11790
},
{
"epoch": 1.93,
"learning_rate": 7.388963660834455e-06,
"loss": 0.4827,
"step": 11800
},
{
"epoch": 1.93,
"learning_rate": 7.3777478689995515e-06,
"loss": 0.4795,
"step": 11810
},
{
"epoch": 1.93,
"learning_rate": 7.366532077164649e-06,
"loss": 0.4636,
"step": 11820
},
{
"epoch": 1.93,
"learning_rate": 7.355316285329744e-06,
"loss": 0.4824,
"step": 11830
},
{
"epoch": 1.93,
"learning_rate": 7.344100493494841e-06,
"loss": 0.4732,
"step": 11840
},
{
"epoch": 1.93,
"learning_rate": 7.332884701659938e-06,
"loss": 0.4798,
"step": 11850
},
{
"epoch": 1.94,
"learning_rate": 7.321668909825034e-06,
"loss": 0.4813,
"step": 11860
},
{
"epoch": 1.94,
"learning_rate": 7.3104531179901305e-06,
"loss": 0.4709,
"step": 11870
},
{
"epoch": 1.94,
"learning_rate": 7.299237326155228e-06,
"loss": 0.4833,
"step": 11880
},
{
"epoch": 1.94,
"learning_rate": 7.288021534320323e-06,
"loss": 0.4772,
"step": 11890
},
{
"epoch": 1.94,
"learning_rate": 7.2768057424854196e-06,
"loss": 0.4654,
"step": 11900
},
{
"epoch": 1.94,
"learning_rate": 7.265589950650517e-06,
"loss": 0.4779,
"step": 11910
},
{
"epoch": 1.95,
"learning_rate": 7.254374158815612e-06,
"loss": 0.4738,
"step": 11920
},
{
"epoch": 1.95,
"learning_rate": 7.2431583669807095e-06,
"loss": 0.4783,
"step": 11930
},
{
"epoch": 1.95,
"learning_rate": 7.231942575145806e-06,
"loss": 0.4798,
"step": 11940
},
{
"epoch": 1.95,
"learning_rate": 7.220726783310902e-06,
"loss": 0.4845,
"step": 11950
},
{
"epoch": 1.95,
"learning_rate": 7.2095109914759986e-06,
"loss": 0.4701,
"step": 11960
},
{
"epoch": 1.95,
"learning_rate": 7.198295199641096e-06,
"loss": 0.4759,
"step": 11970
},
{
"epoch": 1.95,
"learning_rate": 7.187079407806191e-06,
"loss": 0.4781,
"step": 11980
},
{
"epoch": 1.96,
"learning_rate": 7.175863615971288e-06,
"loss": 0.4689,
"step": 11990
},
{
"epoch": 1.96,
"learning_rate": 7.164647824136385e-06,
"loss": 0.4796,
"step": 12000
},
{
"epoch": 1.96,
"learning_rate": 7.15343203230148e-06,
"loss": 0.4801,
"step": 12010
},
{
"epoch": 1.96,
"learning_rate": 7.1422162404665775e-06,
"loss": 0.4685,
"step": 12020
},
{
"epoch": 1.96,
"learning_rate": 7.131000448631674e-06,
"loss": 0.4834,
"step": 12030
},
{
"epoch": 1.96,
"learning_rate": 7.11978465679677e-06,
"loss": 0.474,
"step": 12040
},
{
"epoch": 1.97,
"learning_rate": 7.108568864961867e-06,
"loss": 0.4703,
"step": 12050
},
{
"epoch": 1.97,
"learning_rate": 7.097353073126964e-06,
"loss": 0.4895,
"step": 12060
},
{
"epoch": 1.97,
"learning_rate": 7.086137281292059e-06,
"loss": 0.4765,
"step": 12070
},
{
"epoch": 1.97,
"learning_rate": 7.074921489457156e-06,
"loss": 0.4661,
"step": 12080
},
{
"epoch": 1.97,
"learning_rate": 7.063705697622253e-06,
"loss": 0.4753,
"step": 12090
},
{
"epoch": 1.97,
"learning_rate": 7.05248990578735e-06,
"loss": 0.4835,
"step": 12100
},
{
"epoch": 1.98,
"learning_rate": 7.041274113952446e-06,
"loss": 0.4747,
"step": 12110
},
{
"epoch": 1.98,
"learning_rate": 7.030058322117542e-06,
"loss": 0.4751,
"step": 12120
},
{
"epoch": 1.98,
"learning_rate": 7.018842530282639e-06,
"loss": 0.4702,
"step": 12130
},
{
"epoch": 1.98,
"learning_rate": 7.007626738447735e-06,
"loss": 0.4744,
"step": 12140
},
{
"epoch": 1.98,
"learning_rate": 6.996410946612832e-06,
"loss": 0.4729,
"step": 12150
},
{
"epoch": 1.98,
"learning_rate": 6.985195154777928e-06,
"loss": 0.4689,
"step": 12160
},
{
"epoch": 1.99,
"learning_rate": 6.973979362943024e-06,
"loss": 0.4664,
"step": 12170
},
{
"epoch": 1.99,
"learning_rate": 6.962763571108121e-06,
"loss": 0.4741,
"step": 12180
},
{
"epoch": 1.99,
"learning_rate": 6.951547779273218e-06,
"loss": 0.4676,
"step": 12190
},
{
"epoch": 1.99,
"learning_rate": 6.940331987438314e-06,
"loss": 0.4841,
"step": 12200
},
{
"epoch": 1.99,
"learning_rate": 6.92911619560341e-06,
"loss": 0.4848,
"step": 12210
},
{
"epoch": 1.99,
"learning_rate": 6.917900403768507e-06,
"loss": 0.4792,
"step": 12220
},
{
"epoch": 2.0,
"learning_rate": 6.906684611933603e-06,
"loss": 0.4863,
"step": 12230
},
{
"epoch": 2.0,
"learning_rate": 6.8954688200987e-06,
"loss": 0.4789,
"step": 12240
},
{
"epoch": 2.0,
"learning_rate": 6.884253028263796e-06,
"loss": 0.4726,
"step": 12250
},
{
"epoch": 2.0,
"learning_rate": 6.873037236428892e-06,
"loss": 0.4762,
"step": 12260
},
{
"epoch": 2.0,
"learning_rate": 6.861821444593989e-06,
"loss": 0.4746,
"step": 12270
},
{
"epoch": 2.0,
"learning_rate": 6.850605652759086e-06,
"loss": 0.4637,
"step": 12280
},
{
"epoch": 2.01,
"learning_rate": 6.839389860924182e-06,
"loss": 0.4619,
"step": 12290
},
{
"epoch": 2.01,
"learning_rate": 6.828174069089278e-06,
"loss": 0.4725,
"step": 12300
},
{
"epoch": 2.01,
"learning_rate": 6.816958277254375e-06,
"loss": 0.4737,
"step": 12310
},
{
"epoch": 2.01,
"learning_rate": 6.805742485419471e-06,
"loss": 0.4792,
"step": 12320
},
{
"epoch": 2.01,
"learning_rate": 6.794526693584568e-06,
"loss": 0.4797,
"step": 12330
},
{
"epoch": 2.01,
"learning_rate": 6.783310901749664e-06,
"loss": 0.4707,
"step": 12340
},
{
"epoch": 2.02,
"learning_rate": 6.77209510991476e-06,
"loss": 0.4752,
"step": 12350
},
{
"epoch": 2.02,
"learning_rate": 6.760879318079857e-06,
"loss": 0.4748,
"step": 12360
},
{
"epoch": 2.02,
"learning_rate": 6.749663526244954e-06,
"loss": 0.471,
"step": 12370
},
{
"epoch": 2.02,
"learning_rate": 6.73844773441005e-06,
"loss": 0.475,
"step": 12380
},
{
"epoch": 2.02,
"learning_rate": 6.727231942575146e-06,
"loss": 0.4686,
"step": 12390
},
{
"epoch": 2.02,
"learning_rate": 6.716016150740243e-06,
"loss": 0.4706,
"step": 12400
},
{
"epoch": 2.03,
"learning_rate": 6.704800358905339e-06,
"loss": 0.4615,
"step": 12410
},
{
"epoch": 2.03,
"learning_rate": 6.693584567070436e-06,
"loss": 0.4762,
"step": 12420
},
{
"epoch": 2.03,
"learning_rate": 6.6823687752355324e-06,
"loss": 0.4815,
"step": 12430
},
{
"epoch": 2.03,
"learning_rate": 6.671152983400628e-06,
"loss": 0.477,
"step": 12440
},
{
"epoch": 2.03,
"learning_rate": 6.659937191565725e-06,
"loss": 0.4645,
"step": 12450
},
{
"epoch": 2.03,
"learning_rate": 6.648721399730822e-06,
"loss": 0.4692,
"step": 12460
},
{
"epoch": 2.03,
"learning_rate": 6.637505607895918e-06,
"loss": 0.4739,
"step": 12470
},
{
"epoch": 2.04,
"learning_rate": 6.626289816061014e-06,
"loss": 0.4704,
"step": 12480
},
{
"epoch": 2.04,
"learning_rate": 6.615074024226111e-06,
"loss": 0.4788,
"step": 12490
},
{
"epoch": 2.04,
"learning_rate": 6.603858232391207e-06,
"loss": 0.4717,
"step": 12500
},
{
"epoch": 2.04,
"learning_rate": 6.592642440556304e-06,
"loss": 0.4595,
"step": 12510
},
{
"epoch": 2.04,
"learning_rate": 6.5814266487214005e-06,
"loss": 0.4764,
"step": 12520
},
{
"epoch": 2.04,
"learning_rate": 6.570210856886496e-06,
"loss": 0.4757,
"step": 12530
},
{
"epoch": 2.05,
"learning_rate": 6.558995065051593e-06,
"loss": 0.4757,
"step": 12540
},
{
"epoch": 2.05,
"learning_rate": 6.54777927321669e-06,
"loss": 0.474,
"step": 12550
},
{
"epoch": 2.05,
"learning_rate": 6.536563481381786e-06,
"loss": 0.4786,
"step": 12560
},
{
"epoch": 2.05,
"learning_rate": 6.525347689546882e-06,
"loss": 0.4662,
"step": 12570
},
{
"epoch": 2.05,
"learning_rate": 6.5141318977119795e-06,
"loss": 0.4684,
"step": 12580
},
{
"epoch": 2.05,
"learning_rate": 6.502916105877075e-06,
"loss": 0.4727,
"step": 12590
},
{
"epoch": 2.06,
"learning_rate": 6.491700314042172e-06,
"loss": 0.4913,
"step": 12600
},
{
"epoch": 2.06,
"learning_rate": 6.4804845222072685e-06,
"loss": 0.4618,
"step": 12610
},
{
"epoch": 2.06,
"learning_rate": 6.469268730372364e-06,
"loss": 0.4695,
"step": 12620
},
{
"epoch": 2.06,
"learning_rate": 6.458052938537461e-06,
"loss": 0.4704,
"step": 12630
},
{
"epoch": 2.06,
"learning_rate": 6.4468371467025585e-06,
"loss": 0.4812,
"step": 12640
},
{
"epoch": 2.06,
"learning_rate": 6.435621354867654e-06,
"loss": 0.4707,
"step": 12650
},
{
"epoch": 2.07,
"learning_rate": 6.42440556303275e-06,
"loss": 0.4845,
"step": 12660
},
{
"epoch": 2.07,
"learning_rate": 6.4131897711978475e-06,
"loss": 0.469,
"step": 12670
},
{
"epoch": 2.07,
"learning_rate": 6.401973979362943e-06,
"loss": 0.4649,
"step": 12680
},
{
"epoch": 2.07,
"learning_rate": 6.39075818752804e-06,
"loss": 0.4684,
"step": 12690
},
{
"epoch": 2.07,
"learning_rate": 6.379542395693137e-06,
"loss": 0.4688,
"step": 12700
},
{
"epoch": 2.07,
"learning_rate": 6.368326603858232e-06,
"loss": 0.4748,
"step": 12710
},
{
"epoch": 2.08,
"learning_rate": 6.357110812023329e-06,
"loss": 0.4631,
"step": 12720
},
{
"epoch": 2.08,
"learning_rate": 6.3458950201884265e-06,
"loss": 0.4814,
"step": 12730
},
{
"epoch": 2.08,
"learning_rate": 6.334679228353522e-06,
"loss": 0.4598,
"step": 12740
},
{
"epoch": 2.08,
"learning_rate": 6.323463436518618e-06,
"loss": 0.4807,
"step": 12750
},
{
"epoch": 2.08,
"learning_rate": 6.312247644683716e-06,
"loss": 0.4775,
"step": 12760
},
{
"epoch": 2.08,
"learning_rate": 6.301031852848811e-06,
"loss": 0.464,
"step": 12770
},
{
"epoch": 2.09,
"learning_rate": 6.289816061013908e-06,
"loss": 0.4724,
"step": 12780
},
{
"epoch": 2.09,
"learning_rate": 6.278600269179005e-06,
"loss": 0.4737,
"step": 12790
},
{
"epoch": 2.09,
"learning_rate": 6.2673844773441e-06,
"loss": 0.4737,
"step": 12800
},
{
"epoch": 2.09,
"learning_rate": 6.256168685509197e-06,
"loss": 0.4748,
"step": 12810
},
{
"epoch": 2.09,
"learning_rate": 6.244952893674295e-06,
"loss": 0.4736,
"step": 12820
},
{
"epoch": 2.09,
"learning_rate": 6.23373710183939e-06,
"loss": 0.483,
"step": 12830
},
{
"epoch": 2.1,
"learning_rate": 6.2225213100044865e-06,
"loss": 0.4687,
"step": 12840
},
{
"epoch": 2.1,
"learning_rate": 6.211305518169584e-06,
"loss": 0.4716,
"step": 12850
},
{
"epoch": 2.1,
"learning_rate": 6.200089726334679e-06,
"loss": 0.4723,
"step": 12860
},
{
"epoch": 2.1,
"learning_rate": 6.188873934499776e-06,
"loss": 0.4714,
"step": 12870
},
{
"epoch": 2.1,
"learning_rate": 6.177658142664873e-06,
"loss": 0.4782,
"step": 12880
},
{
"epoch": 2.1,
"learning_rate": 6.166442350829969e-06,
"loss": 0.4804,
"step": 12890
},
{
"epoch": 2.11,
"learning_rate": 6.1552265589950654e-06,
"loss": 0.4793,
"step": 12900
},
{
"epoch": 2.11,
"learning_rate": 6.144010767160163e-06,
"loss": 0.4639,
"step": 12910
},
{
"epoch": 2.11,
"learning_rate": 6.132794975325258e-06,
"loss": 0.4628,
"step": 12920
},
{
"epoch": 2.11,
"learning_rate": 6.1215791834903545e-06,
"loss": 0.4746,
"step": 12930
},
{
"epoch": 2.11,
"learning_rate": 6.110363391655452e-06,
"loss": 0.4625,
"step": 12940
},
{
"epoch": 2.11,
"learning_rate": 6.099147599820547e-06,
"loss": 0.4649,
"step": 12950
},
{
"epoch": 2.11,
"learning_rate": 6.0879318079856444e-06,
"loss": 0.4696,
"step": 12960
},
{
"epoch": 2.12,
"learning_rate": 6.076716016150741e-06,
"loss": 0.4631,
"step": 12970
},
{
"epoch": 2.12,
"learning_rate": 6.065500224315837e-06,
"loss": 0.4634,
"step": 12980
},
{
"epoch": 2.12,
"learning_rate": 6.0542844324809335e-06,
"loss": 0.4702,
"step": 12990
},
{
"epoch": 2.12,
"learning_rate": 6.043068640646031e-06,
"loss": 0.4643,
"step": 13000
},
{
"epoch": 2.12,
"learning_rate": 6.031852848811126e-06,
"loss": 0.4724,
"step": 13010
},
{
"epoch": 2.12,
"learning_rate": 6.020637056976223e-06,
"loss": 0.4738,
"step": 13020
},
{
"epoch": 2.13,
"learning_rate": 6.00942126514132e-06,
"loss": 0.4656,
"step": 13030
},
{
"epoch": 2.13,
"learning_rate": 5.998205473306415e-06,
"loss": 0.4733,
"step": 13040
},
{
"epoch": 2.13,
"learning_rate": 5.9869896814715125e-06,
"loss": 0.4786,
"step": 13050
},
{
"epoch": 2.13,
"learning_rate": 5.975773889636609e-06,
"loss": 0.4657,
"step": 13060
},
{
"epoch": 2.13,
"learning_rate": 5.964558097801705e-06,
"loss": 0.4794,
"step": 13070
},
{
"epoch": 2.13,
"learning_rate": 5.9533423059668016e-06,
"loss": 0.4775,
"step": 13080
},
{
"epoch": 2.14,
"learning_rate": 5.942126514131899e-06,
"loss": 0.4779,
"step": 13090
},
{
"epoch": 2.14,
"learning_rate": 5.930910722296994e-06,
"loss": 0.4712,
"step": 13100
},
{
"epoch": 2.14,
"learning_rate": 5.919694930462091e-06,
"loss": 0.467,
"step": 13110
},
{
"epoch": 2.14,
"learning_rate": 5.908479138627188e-06,
"loss": 0.4607,
"step": 13120
},
{
"epoch": 2.14,
"learning_rate": 5.897263346792283e-06,
"loss": 0.4673,
"step": 13130
},
{
"epoch": 2.14,
"learning_rate": 5.8860475549573806e-06,
"loss": 0.459,
"step": 13140
},
{
"epoch": 2.15,
"learning_rate": 5.874831763122477e-06,
"loss": 0.4771,
"step": 13150
},
{
"epoch": 2.15,
"learning_rate": 5.863615971287573e-06,
"loss": 0.4767,
"step": 13160
},
{
"epoch": 2.15,
"learning_rate": 5.85240017945267e-06,
"loss": 0.464,
"step": 13170
},
{
"epoch": 2.15,
"learning_rate": 5.841184387617767e-06,
"loss": 0.4724,
"step": 13180
},
{
"epoch": 2.15,
"learning_rate": 5.829968595782862e-06,
"loss": 0.464,
"step": 13190
},
{
"epoch": 2.15,
"learning_rate": 5.818752803947959e-06,
"loss": 0.4793,
"step": 13200
},
{
"epoch": 2.16,
"learning_rate": 5.807537012113056e-06,
"loss": 0.4777,
"step": 13210
},
{
"epoch": 2.16,
"learning_rate": 5.796321220278151e-06,
"loss": 0.4756,
"step": 13220
},
{
"epoch": 2.16,
"learning_rate": 5.785105428443249e-06,
"loss": 0.4695,
"step": 13230
},
{
"epoch": 2.16,
"learning_rate": 5.773889636608345e-06,
"loss": 0.4699,
"step": 13240
},
{
"epoch": 2.16,
"learning_rate": 5.762673844773441e-06,
"loss": 0.4676,
"step": 13250
},
{
"epoch": 2.16,
"learning_rate": 5.751458052938538e-06,
"loss": 0.4714,
"step": 13260
},
{
"epoch": 2.17,
"learning_rate": 5.740242261103635e-06,
"loss": 0.475,
"step": 13270
},
{
"epoch": 2.17,
"learning_rate": 5.72902646926873e-06,
"loss": 0.4803,
"step": 13280
},
{
"epoch": 2.17,
"learning_rate": 5.717810677433827e-06,
"loss": 0.4757,
"step": 13290
},
{
"epoch": 2.17,
"learning_rate": 5.706594885598924e-06,
"loss": 0.4742,
"step": 13300
},
{
"epoch": 2.17,
"learning_rate": 5.6953790937640195e-06,
"loss": 0.4717,
"step": 13310
},
{
"epoch": 2.17,
"learning_rate": 5.684163301929117e-06,
"loss": 0.4766,
"step": 13320
},
{
"epoch": 2.18,
"learning_rate": 5.672947510094213e-06,
"loss": 0.4793,
"step": 13330
},
{
"epoch": 2.18,
"learning_rate": 5.66173171825931e-06,
"loss": 0.4717,
"step": 13340
},
{
"epoch": 2.18,
"learning_rate": 5.650515926424406e-06,
"loss": 0.4591,
"step": 13350
},
{
"epoch": 2.18,
"learning_rate": 5.639300134589503e-06,
"loss": 0.463,
"step": 13360
},
{
"epoch": 2.18,
"learning_rate": 5.628084342754599e-06,
"loss": 0.4669,
"step": 13370
},
{
"epoch": 2.18,
"learning_rate": 5.616868550919695e-06,
"loss": 0.4773,
"step": 13380
},
{
"epoch": 2.19,
"learning_rate": 5.605652759084792e-06,
"loss": 0.4642,
"step": 13390
},
{
"epoch": 2.19,
"learning_rate": 5.594436967249889e-06,
"loss": 0.4689,
"step": 13400
},
{
"epoch": 2.19,
"learning_rate": 5.583221175414985e-06,
"loss": 0.4697,
"step": 13410
},
{
"epoch": 2.19,
"learning_rate": 5.572005383580081e-06,
"loss": 0.4627,
"step": 13420
},
{
"epoch": 2.19,
"learning_rate": 5.560789591745178e-06,
"loss": 0.4595,
"step": 13430
},
{
"epoch": 2.19,
"learning_rate": 5.549573799910274e-06,
"loss": 0.4661,
"step": 13440
},
{
"epoch": 2.19,
"learning_rate": 5.538358008075371e-06,
"loss": 0.4616,
"step": 13450
},
{
"epoch": 2.2,
"learning_rate": 5.527142216240467e-06,
"loss": 0.4599,
"step": 13460
},
{
"epoch": 2.2,
"learning_rate": 5.515926424405563e-06,
"loss": 0.4659,
"step": 13470
},
{
"epoch": 2.2,
"learning_rate": 5.50471063257066e-06,
"loss": 0.4645,
"step": 13480
},
{
"epoch": 2.2,
"learning_rate": 5.493494840735757e-06,
"loss": 0.4646,
"step": 13490
},
{
"epoch": 2.2,
"learning_rate": 5.482279048900853e-06,
"loss": 0.4703,
"step": 13500
},
{
"epoch": 2.2,
"learning_rate": 5.471063257065949e-06,
"loss": 0.4717,
"step": 13510
},
{
"epoch": 2.21,
"learning_rate": 5.459847465231046e-06,
"loss": 0.4822,
"step": 13520
},
{
"epoch": 2.21,
"learning_rate": 5.448631673396142e-06,
"loss": 0.4635,
"step": 13530
},
{
"epoch": 2.21,
"learning_rate": 5.437415881561239e-06,
"loss": 0.4709,
"step": 13540
},
{
"epoch": 2.21,
"learning_rate": 5.4262000897263354e-06,
"loss": 0.4734,
"step": 13550
},
{
"epoch": 2.21,
"learning_rate": 5.414984297891431e-06,
"loss": 0.4664,
"step": 13560
},
{
"epoch": 2.21,
"learning_rate": 5.403768506056528e-06,
"loss": 0.472,
"step": 13570
},
{
"epoch": 2.22,
"learning_rate": 5.392552714221625e-06,
"loss": 0.4685,
"step": 13580
},
{
"epoch": 2.22,
"learning_rate": 5.381336922386721e-06,
"loss": 0.4605,
"step": 13590
},
{
"epoch": 2.22,
"learning_rate": 5.370121130551817e-06,
"loss": 0.4594,
"step": 13600
},
{
"epoch": 2.22,
"learning_rate": 5.3589053387169144e-06,
"loss": 0.4852,
"step": 13610
},
{
"epoch": 2.22,
"learning_rate": 5.34768954688201e-06,
"loss": 0.4696,
"step": 13620
},
{
"epoch": 2.22,
"learning_rate": 5.336473755047107e-06,
"loss": 0.4669,
"step": 13630
},
{
"epoch": 2.23,
"learning_rate": 5.3252579632122035e-06,
"loss": 0.4792,
"step": 13640
},
{
"epoch": 2.23,
"learning_rate": 5.314042171377299e-06,
"loss": 0.461,
"step": 13650
},
{
"epoch": 2.23,
"learning_rate": 5.302826379542396e-06,
"loss": 0.4772,
"step": 13660
},
{
"epoch": 2.23,
"learning_rate": 5.2916105877074934e-06,
"loss": 0.4671,
"step": 13670
},
{
"epoch": 2.23,
"learning_rate": 5.280394795872589e-06,
"loss": 0.4792,
"step": 13680
},
{
"epoch": 2.23,
"learning_rate": 5.269179004037685e-06,
"loss": 0.4604,
"step": 13690
},
{
"epoch": 2.24,
"learning_rate": 5.2579632122027825e-06,
"loss": 0.4778,
"step": 13700
},
{
"epoch": 2.24,
"learning_rate": 5.246747420367878e-06,
"loss": 0.4726,
"step": 13710
},
{
"epoch": 2.24,
"learning_rate": 5.235531628532975e-06,
"loss": 0.4681,
"step": 13720
},
{
"epoch": 2.24,
"learning_rate": 5.2243158366980716e-06,
"loss": 0.4716,
"step": 13730
},
{
"epoch": 2.24,
"learning_rate": 5.213100044863167e-06,
"loss": 0.4725,
"step": 13740
},
{
"epoch": 2.24,
"learning_rate": 5.201884253028264e-06,
"loss": 0.4678,
"step": 13750
},
{
"epoch": 2.25,
"learning_rate": 5.1906684611933615e-06,
"loss": 0.4672,
"step": 13760
},
{
"epoch": 2.25,
"learning_rate": 5.179452669358457e-06,
"loss": 0.4768,
"step": 13770
},
{
"epoch": 2.25,
"learning_rate": 5.168236877523553e-06,
"loss": 0.4675,
"step": 13780
},
{
"epoch": 2.25,
"learning_rate": 5.1570210856886506e-06,
"loss": 0.4757,
"step": 13790
},
{
"epoch": 2.25,
"learning_rate": 5.145805293853746e-06,
"loss": 0.472,
"step": 13800
},
{
"epoch": 2.25,
"learning_rate": 5.134589502018843e-06,
"loss": 0.4828,
"step": 13810
},
{
"epoch": 2.26,
"learning_rate": 5.12337371018394e-06,
"loss": 0.4626,
"step": 13820
},
{
"epoch": 2.26,
"learning_rate": 5.112157918349035e-06,
"loss": 0.4668,
"step": 13830
},
{
"epoch": 2.26,
"learning_rate": 5.100942126514132e-06,
"loss": 0.4738,
"step": 13840
},
{
"epoch": 2.26,
"learning_rate": 5.0897263346792296e-06,
"loss": 0.4797,
"step": 13850
},
{
"epoch": 2.26,
"learning_rate": 5.078510542844325e-06,
"loss": 0.4635,
"step": 13860
},
{
"epoch": 2.26,
"learning_rate": 5.067294751009421e-06,
"loss": 0.482,
"step": 13870
},
{
"epoch": 2.27,
"learning_rate": 5.056078959174519e-06,
"loss": 0.4805,
"step": 13880
},
{
"epoch": 2.27,
"learning_rate": 5.044863167339614e-06,
"loss": 0.4657,
"step": 13890
},
{
"epoch": 2.27,
"learning_rate": 5.033647375504711e-06,
"loss": 0.4603,
"step": 13900
},
{
"epoch": 2.27,
"learning_rate": 5.022431583669808e-06,
"loss": 0.46,
"step": 13910
},
{
"epoch": 2.27,
"learning_rate": 5.011215791834903e-06,
"loss": 0.4665,
"step": 13920
},
{
"epoch": 2.27,
"learning_rate": 5e-06,
"loss": 0.467,
"step": 13930
},
{
"epoch": 2.27,
"learning_rate": 4.988784208165097e-06,
"loss": 0.4726,
"step": 13940
},
{
"epoch": 2.28,
"learning_rate": 4.977568416330193e-06,
"loss": 0.4608,
"step": 13950
},
{
"epoch": 2.28,
"learning_rate": 4.9663526244952895e-06,
"loss": 0.4735,
"step": 13960
},
{
"epoch": 2.28,
"learning_rate": 4.955136832660386e-06,
"loss": 0.4627,
"step": 13970
},
{
"epoch": 2.28,
"learning_rate": 4.943921040825483e-06,
"loss": 0.4797,
"step": 13980
},
{
"epoch": 2.28,
"learning_rate": 4.932705248990579e-06,
"loss": 0.4708,
"step": 13990
},
{
"epoch": 2.28,
"learning_rate": 4.921489457155676e-06,
"loss": 0.4673,
"step": 14000
},
{
"epoch": 2.29,
"learning_rate": 4.910273665320772e-06,
"loss": 0.4703,
"step": 14010
},
{
"epoch": 2.29,
"learning_rate": 4.8990578734858685e-06,
"loss": 0.4644,
"step": 14020
},
{
"epoch": 2.29,
"learning_rate": 4.887842081650965e-06,
"loss": 0.4712,
"step": 14030
},
{
"epoch": 2.29,
"learning_rate": 4.876626289816061e-06,
"loss": 0.4643,
"step": 14040
},
{
"epoch": 2.29,
"learning_rate": 4.8654104979811575e-06,
"loss": 0.4795,
"step": 14050
},
{
"epoch": 2.29,
"learning_rate": 4.854194706146254e-06,
"loss": 0.4697,
"step": 14060
},
{
"epoch": 2.3,
"learning_rate": 4.842978914311351e-06,
"loss": 0.4651,
"step": 14070
},
{
"epoch": 2.3,
"learning_rate": 4.8317631224764475e-06,
"loss": 0.4607,
"step": 14080
},
{
"epoch": 2.3,
"learning_rate": 4.820547330641544e-06,
"loss": 0.4746,
"step": 14090
},
{
"epoch": 2.3,
"learning_rate": 4.80933153880664e-06,
"loss": 0.4804,
"step": 14100
},
{
"epoch": 2.3,
"learning_rate": 4.7981157469717365e-06,
"loss": 0.4538,
"step": 14110
},
{
"epoch": 2.3,
"learning_rate": 4.786899955136833e-06,
"loss": 0.4744,
"step": 14120
},
{
"epoch": 2.31,
"learning_rate": 4.775684163301929e-06,
"loss": 0.4747,
"step": 14130
},
{
"epoch": 2.31,
"learning_rate": 4.764468371467026e-06,
"loss": 0.4603,
"step": 14140
},
{
"epoch": 2.31,
"learning_rate": 4.753252579632122e-06,
"loss": 0.4742,
"step": 14150
},
{
"epoch": 2.31,
"learning_rate": 4.742036787797219e-06,
"loss": 0.4678,
"step": 14160
},
{
"epoch": 2.31,
"learning_rate": 4.7308209959623155e-06,
"loss": 0.4707,
"step": 14170
},
{
"epoch": 2.31,
"learning_rate": 4.719605204127412e-06,
"loss": 0.4724,
"step": 14180
},
{
"epoch": 2.32,
"learning_rate": 4.708389412292508e-06,
"loss": 0.4657,
"step": 14190
},
{
"epoch": 2.32,
"learning_rate": 4.697173620457605e-06,
"loss": 0.4748,
"step": 14200
},
{
"epoch": 2.32,
"learning_rate": 4.685957828622701e-06,
"loss": 0.4663,
"step": 14210
},
{
"epoch": 2.32,
"learning_rate": 4.674742036787797e-06,
"loss": 0.4656,
"step": 14220
},
{
"epoch": 2.32,
"learning_rate": 4.663526244952894e-06,
"loss": 0.4611,
"step": 14230
},
{
"epoch": 2.32,
"learning_rate": 4.652310453117991e-06,
"loss": 0.4658,
"step": 14240
},
{
"epoch": 2.33,
"learning_rate": 4.641094661283087e-06,
"loss": 0.4741,
"step": 14250
},
{
"epoch": 2.33,
"learning_rate": 4.629878869448184e-06,
"loss": 0.4728,
"step": 14260
},
{
"epoch": 2.33,
"learning_rate": 4.61866307761328e-06,
"loss": 0.4627,
"step": 14270
},
{
"epoch": 2.33,
"learning_rate": 4.607447285778376e-06,
"loss": 0.4702,
"step": 14280
},
{
"epoch": 2.33,
"learning_rate": 4.596231493943473e-06,
"loss": 0.4744,
"step": 14290
},
{
"epoch": 2.33,
"learning_rate": 4.58501570210857e-06,
"loss": 0.4606,
"step": 14300
},
{
"epoch": 2.34,
"learning_rate": 4.573799910273665e-06,
"loss": 0.467,
"step": 14310
},
{
"epoch": 2.34,
"learning_rate": 4.562584118438762e-06,
"loss": 0.4593,
"step": 14320
},
{
"epoch": 2.34,
"learning_rate": 4.551368326603859e-06,
"loss": 0.4724,
"step": 14330
},
{
"epoch": 2.34,
"learning_rate": 4.540152534768955e-06,
"loss": 0.4659,
"step": 14340
},
{
"epoch": 2.34,
"learning_rate": 4.528936742934052e-06,
"loss": 0.4653,
"step": 14350
},
{
"epoch": 2.34,
"learning_rate": 4.517720951099148e-06,
"loss": 0.4571,
"step": 14360
},
{
"epoch": 2.34,
"learning_rate": 4.506505159264244e-06,
"loss": 0.4711,
"step": 14370
},
{
"epoch": 2.35,
"learning_rate": 4.495289367429341e-06,
"loss": 0.4657,
"step": 14380
},
{
"epoch": 2.35,
"learning_rate": 4.484073575594438e-06,
"loss": 0.4671,
"step": 14390
},
{
"epoch": 2.35,
"learning_rate": 4.4728577837595334e-06,
"loss": 0.4677,
"step": 14400
},
{
"epoch": 2.35,
"learning_rate": 4.46164199192463e-06,
"loss": 0.4751,
"step": 14410
},
{
"epoch": 2.35,
"learning_rate": 4.450426200089727e-06,
"loss": 0.4614,
"step": 14420
},
{
"epoch": 2.35,
"learning_rate": 4.439210408254823e-06,
"loss": 0.4771,
"step": 14430
},
{
"epoch": 2.36,
"learning_rate": 4.42799461641992e-06,
"loss": 0.4573,
"step": 14440
},
{
"epoch": 2.36,
"learning_rate": 4.416778824585016e-06,
"loss": 0.4673,
"step": 14450
},
{
"epoch": 2.36,
"learning_rate": 4.4055630327501124e-06,
"loss": 0.4746,
"step": 14460
},
{
"epoch": 2.36,
"learning_rate": 4.394347240915209e-06,
"loss": 0.4792,
"step": 14470
},
{
"epoch": 2.36,
"learning_rate": 4.383131449080306e-06,
"loss": 0.4728,
"step": 14480
},
{
"epoch": 2.36,
"learning_rate": 4.3719156572454015e-06,
"loss": 0.4674,
"step": 14490
},
{
"epoch": 2.37,
"learning_rate": 4.360699865410498e-06,
"loss": 0.4759,
"step": 14500
},
{
"epoch": 2.37,
"learning_rate": 4.349484073575595e-06,
"loss": 0.478,
"step": 14510
},
{
"epoch": 2.37,
"learning_rate": 4.338268281740691e-06,
"loss": 0.464,
"step": 14520
},
{
"epoch": 2.37,
"learning_rate": 4.327052489905788e-06,
"loss": 0.4801,
"step": 14530
},
{
"epoch": 2.37,
"learning_rate": 4.315836698070884e-06,
"loss": 0.4704,
"step": 14540
},
{
"epoch": 2.37,
"learning_rate": 4.3046209062359805e-06,
"loss": 0.4778,
"step": 14550
},
{
"epoch": 2.38,
"learning_rate": 4.293405114401077e-06,
"loss": 0.4783,
"step": 14560
},
{
"epoch": 2.38,
"learning_rate": 4.282189322566174e-06,
"loss": 0.4821,
"step": 14570
},
{
"epoch": 2.38,
"learning_rate": 4.2709735307312696e-06,
"loss": 0.4607,
"step": 14580
},
{
"epoch": 2.38,
"learning_rate": 4.259757738896366e-06,
"loss": 0.4752,
"step": 14590
},
{
"epoch": 2.38,
"learning_rate": 4.248541947061463e-06,
"loss": 0.4672,
"step": 14600
},
{
"epoch": 2.38,
"learning_rate": 4.2373261552265595e-06,
"loss": 0.466,
"step": 14610
},
{
"epoch": 2.39,
"learning_rate": 4.226110363391656e-06,
"loss": 0.4711,
"step": 14620
},
{
"epoch": 2.39,
"learning_rate": 4.214894571556752e-06,
"loss": 0.4813,
"step": 14630
},
{
"epoch": 2.39,
"learning_rate": 4.2036787797218485e-06,
"loss": 0.4731,
"step": 14640
},
{
"epoch": 2.39,
"learning_rate": 4.192462987886945e-06,
"loss": 0.4656,
"step": 14650
},
{
"epoch": 2.39,
"learning_rate": 4.181247196052042e-06,
"loss": 0.4605,
"step": 14660
},
{
"epoch": 2.39,
"learning_rate": 4.170031404217138e-06,
"loss": 0.4535,
"step": 14670
},
{
"epoch": 2.4,
"learning_rate": 4.158815612382234e-06,
"loss": 0.4601,
"step": 14680
},
{
"epoch": 2.4,
"learning_rate": 4.147599820547331e-06,
"loss": 0.4617,
"step": 14690
},
{
"epoch": 2.4,
"learning_rate": 4.1363840287124275e-06,
"loss": 0.4691,
"step": 14700
},
{
"epoch": 2.4,
"learning_rate": 4.125168236877524e-06,
"loss": 0.4707,
"step": 14710
},
{
"epoch": 2.4,
"learning_rate": 4.11395244504262e-06,
"loss": 0.4678,
"step": 14720
},
{
"epoch": 2.4,
"learning_rate": 4.102736653207717e-06,
"loss": 0.468,
"step": 14730
},
{
"epoch": 2.41,
"learning_rate": 4.091520861372813e-06,
"loss": 0.4675,
"step": 14740
},
{
"epoch": 2.41,
"learning_rate": 4.08030506953791e-06,
"loss": 0.4746,
"step": 14750
},
{
"epoch": 2.41,
"learning_rate": 4.069089277703006e-06,
"loss": 0.4729,
"step": 14760
},
{
"epoch": 2.41,
"learning_rate": 4.057873485868102e-06,
"loss": 0.4681,
"step": 14770
},
{
"epoch": 2.41,
"learning_rate": 4.046657694033199e-06,
"loss": 0.4726,
"step": 14780
},
{
"epoch": 2.41,
"learning_rate": 4.035441902198296e-06,
"loss": 0.472,
"step": 14790
},
{
"epoch": 2.42,
"learning_rate": 4.024226110363392e-06,
"loss": 0.4532,
"step": 14800
},
{
"epoch": 2.42,
"learning_rate": 4.013010318528488e-06,
"loss": 0.475,
"step": 14810
},
{
"epoch": 2.42,
"learning_rate": 4.001794526693585e-06,
"loss": 0.4659,
"step": 14820
},
{
"epoch": 2.42,
"learning_rate": 3.990578734858681e-06,
"loss": 0.4747,
"step": 14830
},
{
"epoch": 2.42,
"learning_rate": 3.979362943023778e-06,
"loss": 0.4697,
"step": 14840
},
{
"epoch": 2.42,
"learning_rate": 3.968147151188875e-06,
"loss": 0.4709,
"step": 14850
},
{
"epoch": 2.42,
"learning_rate": 3.956931359353971e-06,
"loss": 0.4738,
"step": 14860
},
{
"epoch": 2.43,
"learning_rate": 3.945715567519067e-06,
"loss": 0.4628,
"step": 14870
},
{
"epoch": 2.43,
"learning_rate": 3.934499775684164e-06,
"loss": 0.4695,
"step": 14880
},
{
"epoch": 2.43,
"learning_rate": 3.92328398384926e-06,
"loss": 0.4789,
"step": 14890
},
{
"epoch": 2.43,
"learning_rate": 3.912068192014356e-06,
"loss": 0.462,
"step": 14900
},
{
"epoch": 2.43,
"learning_rate": 3.900852400179453e-06,
"loss": 0.4543,
"step": 14910
},
{
"epoch": 2.43,
"learning_rate": 3.88963660834455e-06,
"loss": 0.4577,
"step": 14920
},
{
"epoch": 2.44,
"learning_rate": 3.878420816509646e-06,
"loss": 0.4619,
"step": 14930
},
{
"epoch": 2.44,
"learning_rate": 3.867205024674743e-06,
"loss": 0.4723,
"step": 14940
},
{
"epoch": 2.44,
"learning_rate": 3.855989232839839e-06,
"loss": 0.4687,
"step": 14950
},
{
"epoch": 2.44,
"learning_rate": 3.844773441004935e-06,
"loss": 0.4745,
"step": 14960
},
{
"epoch": 2.44,
"learning_rate": 3.833557649170032e-06,
"loss": 0.4795,
"step": 14970
},
{
"epoch": 2.44,
"learning_rate": 3.822341857335128e-06,
"loss": 0.4591,
"step": 14980
},
{
"epoch": 2.45,
"learning_rate": 3.811126065500225e-06,
"loss": 0.4607,
"step": 14990
},
{
"epoch": 2.45,
"learning_rate": 3.799910273665321e-06,
"loss": 0.4693,
"step": 15000
},
{
"epoch": 2.45,
"learning_rate": 3.7886944818304176e-06,
"loss": 0.4798,
"step": 15010
},
{
"epoch": 2.45,
"learning_rate": 3.777478689995514e-06,
"loss": 0.4655,
"step": 15020
},
{
"epoch": 2.45,
"learning_rate": 3.7662628981606103e-06,
"loss": 0.4543,
"step": 15030
},
{
"epoch": 2.45,
"learning_rate": 3.755047106325707e-06,
"loss": 0.4617,
"step": 15040
},
{
"epoch": 2.46,
"learning_rate": 3.7438313144908034e-06,
"loss": 0.4731,
"step": 15050
},
{
"epoch": 2.46,
"learning_rate": 3.7337371018393903e-06,
"loss": 0.4608,
"step": 15060
},
{
"epoch": 2.46,
"learning_rate": 3.7225213100044867e-06,
"loss": 0.4767,
"step": 15070
},
{
"epoch": 2.46,
"learning_rate": 3.711305518169583e-06,
"loss": 0.4679,
"step": 15080
},
{
"epoch": 2.46,
"learning_rate": 3.70008972633468e-06,
"loss": 0.4646,
"step": 15090
},
{
"epoch": 2.46,
"learning_rate": 3.6888739344997758e-06,
"loss": 0.4713,
"step": 15100
},
{
"epoch": 2.47,
"learning_rate": 3.677658142664872e-06,
"loss": 0.4628,
"step": 15110
},
{
"epoch": 2.47,
"learning_rate": 3.666442350829969e-06,
"loss": 0.4609,
"step": 15120
},
{
"epoch": 2.47,
"learning_rate": 3.6552265589950652e-06,
"loss": 0.4664,
"step": 15130
},
{
"epoch": 2.47,
"learning_rate": 3.6440107671601616e-06,
"loss": 0.4698,
"step": 15140
},
{
"epoch": 2.47,
"learning_rate": 3.6327949753252584e-06,
"loss": 0.4703,
"step": 15150
},
{
"epoch": 2.47,
"learning_rate": 3.6215791834903547e-06,
"loss": 0.4652,
"step": 15160
},
{
"epoch": 2.48,
"learning_rate": 3.610363391655451e-06,
"loss": 0.4676,
"step": 15170
},
{
"epoch": 2.48,
"learning_rate": 3.599147599820548e-06,
"loss": 0.4586,
"step": 15180
},
{
"epoch": 2.48,
"learning_rate": 3.587931807985644e-06,
"loss": 0.4713,
"step": 15190
},
{
"epoch": 2.48,
"learning_rate": 3.57671601615074e-06,
"loss": 0.4608,
"step": 15200
},
{
"epoch": 2.48,
"learning_rate": 3.565500224315837e-06,
"loss": 0.4606,
"step": 15210
},
{
"epoch": 2.48,
"learning_rate": 3.5542844324809333e-06,
"loss": 0.4805,
"step": 15220
},
{
"epoch": 2.49,
"learning_rate": 3.5430686406460297e-06,
"loss": 0.4572,
"step": 15230
},
{
"epoch": 2.49,
"learning_rate": 3.5318528488111264e-06,
"loss": 0.4591,
"step": 15240
},
{
"epoch": 2.49,
"learning_rate": 3.520637056976223e-06,
"loss": 0.4635,
"step": 15250
},
{
"epoch": 2.49,
"learning_rate": 3.5094212651413196e-06,
"loss": 0.4734,
"step": 15260
},
{
"epoch": 2.49,
"learning_rate": 3.498205473306416e-06,
"loss": 0.4624,
"step": 15270
},
{
"epoch": 2.49,
"learning_rate": 3.486989681471512e-06,
"loss": 0.4719,
"step": 15280
},
{
"epoch": 2.5,
"learning_rate": 3.475773889636609e-06,
"loss": 0.4617,
"step": 15290
},
{
"epoch": 2.5,
"learning_rate": 3.464558097801705e-06,
"loss": 0.4742,
"step": 15300
},
{
"epoch": 2.5,
"learning_rate": 3.4533423059668014e-06,
"loss": 0.47,
"step": 15310
},
{
"epoch": 2.5,
"learning_rate": 3.442126514131898e-06,
"loss": 0.4725,
"step": 15320
},
{
"epoch": 2.5,
"learning_rate": 3.4309107222969945e-06,
"loss": 0.4618,
"step": 15330
},
{
"epoch": 2.5,
"learning_rate": 3.419694930462091e-06,
"loss": 0.4651,
"step": 15340
},
{
"epoch": 2.5,
"learning_rate": 3.4084791386271876e-06,
"loss": 0.4662,
"step": 15350
},
{
"epoch": 2.51,
"learning_rate": 3.397263346792284e-06,
"loss": 0.4773,
"step": 15360
},
{
"epoch": 2.51,
"learning_rate": 3.38604755495738e-06,
"loss": 0.457,
"step": 15370
},
{
"epoch": 2.51,
"learning_rate": 3.374831763122477e-06,
"loss": 0.4621,
"step": 15380
},
{
"epoch": 2.51,
"learning_rate": 3.363615971287573e-06,
"loss": 0.472,
"step": 15390
},
{
"epoch": 2.51,
"learning_rate": 3.3524001794526694e-06,
"loss": 0.4784,
"step": 15400
},
{
"epoch": 2.51,
"learning_rate": 3.3411843876177662e-06,
"loss": 0.4743,
"step": 15410
},
{
"epoch": 2.52,
"learning_rate": 3.3299685957828626e-06,
"loss": 0.4677,
"step": 15420
},
{
"epoch": 2.52,
"learning_rate": 3.318752803947959e-06,
"loss": 0.4707,
"step": 15430
},
{
"epoch": 2.52,
"learning_rate": 3.3075370121130557e-06,
"loss": 0.4629,
"step": 15440
},
{
"epoch": 2.52,
"learning_rate": 3.296321220278152e-06,
"loss": 0.4771,
"step": 15450
},
{
"epoch": 2.52,
"learning_rate": 3.285105428443248e-06,
"loss": 0.4681,
"step": 15460
},
{
"epoch": 2.52,
"learning_rate": 3.273889636608345e-06,
"loss": 0.4653,
"step": 15470
},
{
"epoch": 2.53,
"learning_rate": 3.262673844773441e-06,
"loss": 0.459,
"step": 15480
},
{
"epoch": 2.53,
"learning_rate": 3.2514580529385375e-06,
"loss": 0.471,
"step": 15490
},
{
"epoch": 2.53,
"learning_rate": 3.2402422611036343e-06,
"loss": 0.464,
"step": 15500
},
{
"epoch": 2.53,
"learning_rate": 3.2290264692687306e-06,
"loss": 0.4774,
"step": 15510
},
{
"epoch": 2.53,
"learning_rate": 3.217810677433827e-06,
"loss": 0.4736,
"step": 15520
},
{
"epoch": 2.53,
"learning_rate": 3.2065948855989238e-06,
"loss": 0.4702,
"step": 15530
},
{
"epoch": 2.54,
"learning_rate": 3.19537909376402e-06,
"loss": 0.4598,
"step": 15540
},
{
"epoch": 2.54,
"learning_rate": 3.184163301929116e-06,
"loss": 0.4565,
"step": 15550
},
{
"epoch": 2.54,
"learning_rate": 3.1729475100942133e-06,
"loss": 0.4617,
"step": 15560
},
{
"epoch": 2.54,
"learning_rate": 3.161731718259309e-06,
"loss": 0.4555,
"step": 15570
},
{
"epoch": 2.54,
"learning_rate": 3.1505159264244056e-06,
"loss": 0.4594,
"step": 15580
},
{
"epoch": 2.54,
"learning_rate": 3.1393001345895023e-06,
"loss": 0.4625,
"step": 15590
},
{
"epoch": 2.55,
"learning_rate": 3.1280843427545987e-06,
"loss": 0.4703,
"step": 15600
},
{
"epoch": 2.55,
"learning_rate": 3.116868550919695e-06,
"loss": 0.4745,
"step": 15610
},
{
"epoch": 2.55,
"learning_rate": 3.105652759084792e-06,
"loss": 0.477,
"step": 15620
},
{
"epoch": 2.55,
"learning_rate": 3.094436967249888e-06,
"loss": 0.4619,
"step": 15630
},
{
"epoch": 2.55,
"learning_rate": 3.0832211754149845e-06,
"loss": 0.4658,
"step": 15640
},
{
"epoch": 2.55,
"learning_rate": 3.0720053835800813e-06,
"loss": 0.4684,
"step": 15650
},
{
"epoch": 2.56,
"learning_rate": 3.0607895917451773e-06,
"loss": 0.461,
"step": 15660
},
{
"epoch": 2.56,
"learning_rate": 3.0495737999102736e-06,
"loss": 0.4716,
"step": 15670
},
{
"epoch": 2.56,
"learning_rate": 3.0383580080753704e-06,
"loss": 0.4632,
"step": 15680
},
{
"epoch": 2.56,
"learning_rate": 3.0271422162404668e-06,
"loss": 0.4726,
"step": 15690
},
{
"epoch": 2.56,
"learning_rate": 3.015926424405563e-06,
"loss": 0.4733,
"step": 15700
},
{
"epoch": 2.56,
"learning_rate": 3.00471063257066e-06,
"loss": 0.4668,
"step": 15710
},
{
"epoch": 2.57,
"learning_rate": 2.9934948407357563e-06,
"loss": 0.466,
"step": 15720
},
{
"epoch": 2.57,
"learning_rate": 2.9822790489008526e-06,
"loss": 0.4708,
"step": 15730
},
{
"epoch": 2.57,
"learning_rate": 2.9710632570659494e-06,
"loss": 0.4719,
"step": 15740
},
{
"epoch": 2.57,
"learning_rate": 2.9598474652310453e-06,
"loss": 0.4757,
"step": 15750
},
{
"epoch": 2.57,
"learning_rate": 2.9486316733961417e-06,
"loss": 0.4775,
"step": 15760
},
{
"epoch": 2.57,
"learning_rate": 2.9374158815612385e-06,
"loss": 0.4569,
"step": 15770
},
{
"epoch": 2.58,
"learning_rate": 2.926200089726335e-06,
"loss": 0.4758,
"step": 15780
},
{
"epoch": 2.58,
"learning_rate": 2.914984297891431e-06,
"loss": 0.4654,
"step": 15790
},
{
"epoch": 2.58,
"learning_rate": 2.903768506056528e-06,
"loss": 0.4641,
"step": 15800
},
{
"epoch": 2.58,
"learning_rate": 2.8925527142216243e-06,
"loss": 0.4675,
"step": 15810
},
{
"epoch": 2.58,
"learning_rate": 2.8813369223867207e-06,
"loss": 0.4771,
"step": 15820
},
{
"epoch": 2.58,
"learning_rate": 2.8701211305518175e-06,
"loss": 0.4653,
"step": 15830
},
{
"epoch": 2.58,
"learning_rate": 2.8589053387169134e-06,
"loss": 0.4646,
"step": 15840
},
{
"epoch": 2.59,
"learning_rate": 2.8476895468820097e-06,
"loss": 0.4671,
"step": 15850
},
{
"epoch": 2.59,
"learning_rate": 2.8364737550471065e-06,
"loss": 0.4733,
"step": 15860
},
{
"epoch": 2.59,
"learning_rate": 2.825257963212203e-06,
"loss": 0.4629,
"step": 15870
},
{
"epoch": 2.59,
"learning_rate": 2.8140421713772997e-06,
"loss": 0.4803,
"step": 15880
},
{
"epoch": 2.59,
"learning_rate": 2.802826379542396e-06,
"loss": 0.465,
"step": 15890
},
{
"epoch": 2.59,
"learning_rate": 2.7916105877074924e-06,
"loss": 0.466,
"step": 15900
},
{
"epoch": 2.6,
"learning_rate": 2.780394795872589e-06,
"loss": 0.4667,
"step": 15910
},
{
"epoch": 2.6,
"learning_rate": 2.7691790040376855e-06,
"loss": 0.4703,
"step": 15920
},
{
"epoch": 2.6,
"learning_rate": 2.7579632122027814e-06,
"loss": 0.4772,
"step": 15930
},
{
"epoch": 2.6,
"learning_rate": 2.7467474203678787e-06,
"loss": 0.4728,
"step": 15940
},
{
"epoch": 2.6,
"learning_rate": 2.7355316285329746e-06,
"loss": 0.4724,
"step": 15950
},
{
"epoch": 2.6,
"learning_rate": 2.724315836698071e-06,
"loss": 0.4666,
"step": 15960
},
{
"epoch": 2.61,
"learning_rate": 2.7131000448631677e-06,
"loss": 0.4648,
"step": 15970
},
{
"epoch": 2.61,
"learning_rate": 2.701884253028264e-06,
"loss": 0.4647,
"step": 15980
},
{
"epoch": 2.61,
"learning_rate": 2.6906684611933604e-06,
"loss": 0.4679,
"step": 15990
},
{
"epoch": 2.61,
"learning_rate": 2.6794526693584572e-06,
"loss": 0.4604,
"step": 16000
},
{
"epoch": 2.61,
"learning_rate": 2.6682368775235536e-06,
"loss": 0.4632,
"step": 16010
},
{
"epoch": 2.61,
"learning_rate": 2.6570210856886495e-06,
"loss": 0.4593,
"step": 16020
},
{
"epoch": 2.62,
"learning_rate": 2.6458052938537467e-06,
"loss": 0.4646,
"step": 16030
},
{
"epoch": 2.62,
"learning_rate": 2.6345895020188426e-06,
"loss": 0.459,
"step": 16040
},
{
"epoch": 2.62,
"learning_rate": 2.623373710183939e-06,
"loss": 0.4707,
"step": 16050
},
{
"epoch": 2.62,
"learning_rate": 2.6121579183490358e-06,
"loss": 0.462,
"step": 16060
},
{
"epoch": 2.62,
"learning_rate": 2.600942126514132e-06,
"loss": 0.4696,
"step": 16070
},
{
"epoch": 2.62,
"learning_rate": 2.5897263346792285e-06,
"loss": 0.4678,
"step": 16080
},
{
"epoch": 2.63,
"learning_rate": 2.5785105428443253e-06,
"loss": 0.4669,
"step": 16090
},
{
"epoch": 2.63,
"learning_rate": 2.5672947510094216e-06,
"loss": 0.4619,
"step": 16100
},
{
"epoch": 2.63,
"learning_rate": 2.5560789591745176e-06,
"loss": 0.4588,
"step": 16110
},
{
"epoch": 2.63,
"learning_rate": 2.5448631673396148e-06,
"loss": 0.4608,
"step": 16120
},
{
"epoch": 2.63,
"learning_rate": 2.5336473755047107e-06,
"loss": 0.4564,
"step": 16130
},
{
"epoch": 2.63,
"learning_rate": 2.522431583669807e-06,
"loss": 0.4682,
"step": 16140
},
{
"epoch": 2.64,
"learning_rate": 2.511215791834904e-06,
"loss": 0.4623,
"step": 16150
},
{
"epoch": 2.64,
"learning_rate": 2.5e-06,
"loss": 0.4652,
"step": 16160
},
{
"epoch": 2.64,
"learning_rate": 2.4887842081650966e-06,
"loss": 0.4679,
"step": 16170
},
{
"epoch": 2.64,
"learning_rate": 2.477568416330193e-06,
"loss": 0.4719,
"step": 16180
},
{
"epoch": 2.64,
"learning_rate": 2.4663526244952897e-06,
"loss": 0.4835,
"step": 16190
},
{
"epoch": 2.64,
"learning_rate": 2.455136832660386e-06,
"loss": 0.4721,
"step": 16200
},
{
"epoch": 2.65,
"learning_rate": 2.4439210408254824e-06,
"loss": 0.4616,
"step": 16210
},
{
"epoch": 2.65,
"learning_rate": 2.4327052489905788e-06,
"loss": 0.4592,
"step": 16220
},
{
"epoch": 2.65,
"learning_rate": 2.4214894571556756e-06,
"loss": 0.4607,
"step": 16230
},
{
"epoch": 2.65,
"learning_rate": 2.410273665320772e-06,
"loss": 0.4716,
"step": 16240
},
{
"epoch": 2.65,
"learning_rate": 2.3990578734858683e-06,
"loss": 0.4689,
"step": 16250
},
{
"epoch": 2.65,
"learning_rate": 2.3878420816509646e-06,
"loss": 0.4712,
"step": 16260
},
{
"epoch": 2.66,
"learning_rate": 2.376626289816061e-06,
"loss": 0.4771,
"step": 16270
},
{
"epoch": 2.66,
"learning_rate": 2.3654104979811578e-06,
"loss": 0.4687,
"step": 16280
},
{
"epoch": 2.66,
"learning_rate": 2.354194706146254e-06,
"loss": 0.4683,
"step": 16290
},
{
"epoch": 2.66,
"learning_rate": 2.3429789143113505e-06,
"loss": 0.4672,
"step": 16300
},
{
"epoch": 2.66,
"learning_rate": 2.331763122476447e-06,
"loss": 0.4772,
"step": 16310
},
{
"epoch": 2.66,
"learning_rate": 2.3205473306415436e-06,
"loss": 0.4696,
"step": 16320
},
{
"epoch": 2.66,
"learning_rate": 2.30933153880664e-06,
"loss": 0.4691,
"step": 16330
},
{
"epoch": 2.67,
"learning_rate": 2.2981157469717363e-06,
"loss": 0.4646,
"step": 16340
},
{
"epoch": 2.67,
"learning_rate": 2.2868999551368327e-06,
"loss": 0.4736,
"step": 16350
},
{
"epoch": 2.67,
"learning_rate": 2.2756841633019295e-06,
"loss": 0.4831,
"step": 16360
},
{
"epoch": 2.67,
"learning_rate": 2.264468371467026e-06,
"loss": 0.4555,
"step": 16370
},
{
"epoch": 2.67,
"learning_rate": 2.253252579632122e-06,
"loss": 0.4751,
"step": 16380
},
{
"epoch": 2.67,
"learning_rate": 2.242036787797219e-06,
"loss": 0.472,
"step": 16390
},
{
"epoch": 2.68,
"learning_rate": 2.230820995962315e-06,
"loss": 0.4743,
"step": 16400
},
{
"epoch": 2.68,
"learning_rate": 2.2196052041274117e-06,
"loss": 0.4657,
"step": 16410
},
{
"epoch": 2.68,
"learning_rate": 2.208389412292508e-06,
"loss": 0.459,
"step": 16420
},
{
"epoch": 2.68,
"learning_rate": 2.1971736204576044e-06,
"loss": 0.4586,
"step": 16430
},
{
"epoch": 2.68,
"learning_rate": 2.1859578286227007e-06,
"loss": 0.4627,
"step": 16440
},
{
"epoch": 2.68,
"learning_rate": 2.1747420367877975e-06,
"loss": 0.4665,
"step": 16450
},
{
"epoch": 2.69,
"learning_rate": 2.163526244952894e-06,
"loss": 0.4625,
"step": 16460
},
{
"epoch": 2.69,
"learning_rate": 2.1523104531179902e-06,
"loss": 0.4576,
"step": 16470
},
{
"epoch": 2.69,
"learning_rate": 2.141094661283087e-06,
"loss": 0.465,
"step": 16480
},
{
"epoch": 2.69,
"learning_rate": 2.129878869448183e-06,
"loss": 0.4622,
"step": 16490
},
{
"epoch": 2.69,
"learning_rate": 2.1186630776132797e-06,
"loss": 0.4624,
"step": 16500
},
{
"epoch": 2.69,
"learning_rate": 2.107447285778376e-06,
"loss": 0.4644,
"step": 16510
},
{
"epoch": 2.7,
"learning_rate": 2.0962314939434725e-06,
"loss": 0.472,
"step": 16520
},
{
"epoch": 2.7,
"learning_rate": 2.085015702108569e-06,
"loss": 0.4694,
"step": 16530
},
{
"epoch": 2.7,
"learning_rate": 2.0737999102736656e-06,
"loss": 0.4766,
"step": 16540
},
{
"epoch": 2.7,
"learning_rate": 2.062584118438762e-06,
"loss": 0.467,
"step": 16550
},
{
"epoch": 2.7,
"learning_rate": 2.0513683266038583e-06,
"loss": 0.4701,
"step": 16560
},
{
"epoch": 2.7,
"learning_rate": 2.040152534768955e-06,
"loss": 0.4611,
"step": 16570
},
{
"epoch": 2.71,
"learning_rate": 2.028936742934051e-06,
"loss": 0.4631,
"step": 16580
},
{
"epoch": 2.71,
"learning_rate": 2.017720951099148e-06,
"loss": 0.4672,
"step": 16590
},
{
"epoch": 2.71,
"learning_rate": 2.006505159264244e-06,
"loss": 0.4649,
"step": 16600
},
{
"epoch": 2.71,
"learning_rate": 1.9952893674293405e-06,
"loss": 0.4703,
"step": 16610
},
{
"epoch": 2.71,
"learning_rate": 1.9840735755944373e-06,
"loss": 0.4685,
"step": 16620
},
{
"epoch": 2.71,
"learning_rate": 1.9728577837595337e-06,
"loss": 0.4678,
"step": 16630
},
{
"epoch": 2.72,
"learning_rate": 1.96164199192463e-06,
"loss": 0.4651,
"step": 16640
},
{
"epoch": 2.72,
"learning_rate": 1.9504262000897264e-06,
"loss": 0.4585,
"step": 16650
},
{
"epoch": 2.72,
"learning_rate": 1.939210408254823e-06,
"loss": 0.4764,
"step": 16660
},
{
"epoch": 2.72,
"learning_rate": 1.9279946164199195e-06,
"loss": 0.4656,
"step": 16670
},
{
"epoch": 2.72,
"learning_rate": 1.916778824585016e-06,
"loss": 0.4627,
"step": 16680
},
{
"epoch": 2.72,
"learning_rate": 1.9055630327501124e-06,
"loss": 0.4646,
"step": 16690
},
{
"epoch": 2.73,
"learning_rate": 1.8943472409152088e-06,
"loss": 0.4678,
"step": 16700
},
{
"epoch": 2.73,
"learning_rate": 1.8831314490803051e-06,
"loss": 0.4778,
"step": 16710
},
{
"epoch": 2.73,
"learning_rate": 1.8719156572454017e-06,
"loss": 0.4632,
"step": 16720
},
{
"epoch": 2.73,
"learning_rate": 1.8606998654104983e-06,
"loss": 0.474,
"step": 16730
},
{
"epoch": 2.73,
"learning_rate": 1.8494840735755944e-06,
"loss": 0.4667,
"step": 16740
},
{
"epoch": 2.73,
"learning_rate": 1.838268281740691e-06,
"loss": 0.4698,
"step": 16750
},
{
"epoch": 2.73,
"learning_rate": 1.8270524899057876e-06,
"loss": 0.4575,
"step": 16760
},
{
"epoch": 2.74,
"learning_rate": 1.815836698070884e-06,
"loss": 0.4755,
"step": 16770
},
{
"epoch": 2.74,
"learning_rate": 1.8046209062359805e-06,
"loss": 0.4809,
"step": 16780
},
{
"epoch": 2.74,
"learning_rate": 1.7934051144010769e-06,
"loss": 0.4655,
"step": 16790
},
{
"epoch": 2.74,
"learning_rate": 1.7821893225661732e-06,
"loss": 0.4685,
"step": 16800
},
{
"epoch": 2.74,
"learning_rate": 1.7709735307312698e-06,
"loss": 0.4659,
"step": 16810
},
{
"epoch": 2.74,
"learning_rate": 1.7597577388963663e-06,
"loss": 0.4676,
"step": 16820
},
{
"epoch": 2.75,
"learning_rate": 1.7485419470614625e-06,
"loss": 0.4582,
"step": 16830
},
{
"epoch": 2.75,
"learning_rate": 1.737326155226559e-06,
"loss": 0.4692,
"step": 16840
},
{
"epoch": 2.75,
"learning_rate": 1.7261103633916556e-06,
"loss": 0.4674,
"step": 16850
},
{
"epoch": 2.75,
"learning_rate": 1.714894571556752e-06,
"loss": 0.4644,
"step": 16860
},
{
"epoch": 2.75,
"learning_rate": 1.7036787797218486e-06,
"loss": 0.4727,
"step": 16870
},
{
"epoch": 2.75,
"learning_rate": 1.6924629878869451e-06,
"loss": 0.4748,
"step": 16880
},
{
"epoch": 2.76,
"learning_rate": 1.6812471960520413e-06,
"loss": 0.4653,
"step": 16890
},
{
"epoch": 2.76,
"learning_rate": 1.6700314042171378e-06,
"loss": 0.4676,
"step": 16900
},
{
"epoch": 2.76,
"learning_rate": 1.6588156123822344e-06,
"loss": 0.4725,
"step": 16910
},
{
"epoch": 2.76,
"learning_rate": 1.6475998205473306e-06,
"loss": 0.4715,
"step": 16920
},
{
"epoch": 2.76,
"learning_rate": 1.6363840287124271e-06,
"loss": 0.4602,
"step": 16930
},
{
"epoch": 2.76,
"learning_rate": 1.6251682368775237e-06,
"loss": 0.4657,
"step": 16940
},
{
"epoch": 2.77,
"learning_rate": 1.6139524450426203e-06,
"loss": 0.4735,
"step": 16950
},
{
"epoch": 2.77,
"learning_rate": 1.6027366532077166e-06,
"loss": 0.4625,
"step": 16960
},
{
"epoch": 2.77,
"learning_rate": 1.5915208613728132e-06,
"loss": 0.4575,
"step": 16970
},
{
"epoch": 2.77,
"learning_rate": 1.5803050695379095e-06,
"loss": 0.4619,
"step": 16980
},
{
"epoch": 2.77,
"learning_rate": 1.569089277703006e-06,
"loss": 0.4673,
"step": 16990
},
{
"epoch": 2.77,
"learning_rate": 1.5578734858681025e-06,
"loss": 0.4607,
"step": 17000
},
{
"epoch": 2.78,
"learning_rate": 1.546657694033199e-06,
"loss": 0.4702,
"step": 17010
},
{
"epoch": 2.78,
"learning_rate": 1.5354419021982952e-06,
"loss": 0.4622,
"step": 17020
},
{
"epoch": 2.78,
"learning_rate": 1.5242261103633918e-06,
"loss": 0.46,
"step": 17030
},
{
"epoch": 2.78,
"learning_rate": 1.5130103185284883e-06,
"loss": 0.4759,
"step": 17040
},
{
"epoch": 2.78,
"learning_rate": 1.5017945266935847e-06,
"loss": 0.4663,
"step": 17050
},
{
"epoch": 2.78,
"learning_rate": 1.4905787348586812e-06,
"loss": 0.4773,
"step": 17060
},
{
"epoch": 2.79,
"learning_rate": 1.4793629430237776e-06,
"loss": 0.4588,
"step": 17070
},
{
"epoch": 2.79,
"learning_rate": 1.468147151188874e-06,
"loss": 0.4705,
"step": 17080
},
{
"epoch": 2.79,
"learning_rate": 1.4569313593539705e-06,
"loss": 0.466,
"step": 17090
},
{
"epoch": 2.79,
"learning_rate": 1.445715567519067e-06,
"loss": 0.4704,
"step": 17100
},
{
"epoch": 2.79,
"learning_rate": 1.4344997756841632e-06,
"loss": 0.4631,
"step": 17110
},
{
"epoch": 2.79,
"learning_rate": 1.4232839838492598e-06,
"loss": 0.4658,
"step": 17120
},
{
"epoch": 2.8,
"learning_rate": 1.4120681920143564e-06,
"loss": 0.4629,
"step": 17130
},
{
"epoch": 2.8,
"learning_rate": 1.4008524001794527e-06,
"loss": 0.4614,
"step": 17140
},
{
"epoch": 2.8,
"learning_rate": 1.3896366083445493e-06,
"loss": 0.4742,
"step": 17150
},
{
"epoch": 2.8,
"learning_rate": 1.3784208165096457e-06,
"loss": 0.4547,
"step": 17160
},
{
"epoch": 2.8,
"learning_rate": 1.367205024674742e-06,
"loss": 0.4481,
"step": 17170
},
{
"epoch": 2.8,
"learning_rate": 1.3559892328398386e-06,
"loss": 0.4628,
"step": 17180
},
{
"epoch": 2.81,
"learning_rate": 1.3447734410049352e-06,
"loss": 0.47,
"step": 17190
},
{
"epoch": 2.81,
"learning_rate": 1.3335576491700313e-06,
"loss": 0.4543,
"step": 17200
},
{
"epoch": 2.81,
"learning_rate": 1.3223418573351279e-06,
"loss": 0.4646,
"step": 17210
},
{
"epoch": 2.81,
"learning_rate": 1.3111260655002244e-06,
"loss": 0.4705,
"step": 17220
},
{
"epoch": 2.81,
"learning_rate": 1.2999102736653208e-06,
"loss": 0.4649,
"step": 17230
},
{
"epoch": 2.81,
"learning_rate": 1.2886944818304174e-06,
"loss": 0.4637,
"step": 17240
},
{
"epoch": 2.81,
"learning_rate": 1.2774786899955137e-06,
"loss": 0.4602,
"step": 17250
},
{
"epoch": 2.82,
"learning_rate": 1.2662628981606103e-06,
"loss": 0.4743,
"step": 17260
},
{
"epoch": 2.82,
"learning_rate": 1.2550471063257067e-06,
"loss": 0.4649,
"step": 17270
},
{
"epoch": 2.82,
"learning_rate": 1.2438313144908032e-06,
"loss": 0.4541,
"step": 17280
},
{
"epoch": 2.82,
"learning_rate": 1.2326155226558996e-06,
"loss": 0.4619,
"step": 17290
},
{
"epoch": 2.82,
"learning_rate": 1.221399730820996e-06,
"loss": 0.4783,
"step": 17300
},
{
"epoch": 2.82,
"learning_rate": 1.2101839389860925e-06,
"loss": 0.451,
"step": 17310
},
{
"epoch": 2.83,
"learning_rate": 1.1989681471511889e-06,
"loss": 0.4639,
"step": 17320
},
{
"epoch": 2.83,
"learning_rate": 1.1877523553162854e-06,
"loss": 0.4599,
"step": 17330
},
{
"epoch": 2.83,
"learning_rate": 1.176536563481382e-06,
"loss": 0.4702,
"step": 17340
},
{
"epoch": 2.83,
"learning_rate": 1.1653207716464784e-06,
"loss": 0.4689,
"step": 17350
},
{
"epoch": 2.83,
"learning_rate": 1.154104979811575e-06,
"loss": 0.4683,
"step": 17360
},
{
"epoch": 2.83,
"learning_rate": 1.1428891879766713e-06,
"loss": 0.4672,
"step": 17370
},
{
"epoch": 2.84,
"learning_rate": 1.1316733961417676e-06,
"loss": 0.4728,
"step": 17380
},
{
"epoch": 2.84,
"learning_rate": 1.1204576043068642e-06,
"loss": 0.4736,
"step": 17390
},
{
"epoch": 2.84,
"learning_rate": 1.1092418124719606e-06,
"loss": 0.461,
"step": 17400
},
{
"epoch": 2.84,
"learning_rate": 1.098026020637057e-06,
"loss": 0.4564,
"step": 17410
},
{
"epoch": 2.84,
"learning_rate": 1.0868102288021535e-06,
"loss": 0.4573,
"step": 17420
},
{
"epoch": 2.84,
"learning_rate": 1.07559443696725e-06,
"loss": 0.4646,
"step": 17430
},
{
"epoch": 2.85,
"learning_rate": 1.0643786451323464e-06,
"loss": 0.4579,
"step": 17440
},
{
"epoch": 2.85,
"learning_rate": 1.053162853297443e-06,
"loss": 0.4557,
"step": 17450
},
{
"epoch": 2.85,
"learning_rate": 1.0419470614625394e-06,
"loss": 0.4654,
"step": 17460
},
{
"epoch": 2.85,
"learning_rate": 1.0307312696276357e-06,
"loss": 0.4673,
"step": 17470
},
{
"epoch": 2.85,
"learning_rate": 1.0195154777927323e-06,
"loss": 0.4602,
"step": 17480
},
{
"epoch": 2.85,
"learning_rate": 1.0082996859578286e-06,
"loss": 0.4536,
"step": 17490
},
{
"epoch": 2.86,
"learning_rate": 9.970838941229252e-07,
"loss": 0.4669,
"step": 17500
},
{
"epoch": 2.86,
"learning_rate": 9.858681022880216e-07,
"loss": 0.4633,
"step": 17510
},
{
"epoch": 2.86,
"learning_rate": 9.746523104531181e-07,
"loss": 0.468,
"step": 17520
},
{
"epoch": 2.86,
"learning_rate": 9.634365186182145e-07,
"loss": 0.4733,
"step": 17530
},
{
"epoch": 2.86,
"learning_rate": 9.52220726783311e-07,
"loss": 0.476,
"step": 17540
},
{
"epoch": 2.86,
"learning_rate": 9.410049349484074e-07,
"loss": 0.4666,
"step": 17550
},
{
"epoch": 2.87,
"learning_rate": 9.29789143113504e-07,
"loss": 0.4684,
"step": 17560
},
{
"epoch": 2.87,
"learning_rate": 9.185733512786003e-07,
"loss": 0.4673,
"step": 17570
},
{
"epoch": 2.87,
"learning_rate": 9.073575594436967e-07,
"loss": 0.4658,
"step": 17580
},
{
"epoch": 2.87,
"learning_rate": 8.961417676087933e-07,
"loss": 0.4626,
"step": 17590
},
{
"epoch": 2.87,
"learning_rate": 8.849259757738897e-07,
"loss": 0.4619,
"step": 17600
},
{
"epoch": 2.87,
"learning_rate": 8.737101839389861e-07,
"loss": 0.4563,
"step": 17610
},
{
"epoch": 2.88,
"learning_rate": 8.624943921040827e-07,
"loss": 0.4705,
"step": 17620
},
{
"epoch": 2.88,
"learning_rate": 8.51278600269179e-07,
"loss": 0.4573,
"step": 17630
},
{
"epoch": 2.88,
"learning_rate": 8.400628084342756e-07,
"loss": 0.4791,
"step": 17640
},
{
"epoch": 2.88,
"learning_rate": 8.28847016599372e-07,
"loss": 0.458,
"step": 17650
},
{
"epoch": 2.88,
"learning_rate": 8.176312247644684e-07,
"loss": 0.4609,
"step": 17660
},
{
"epoch": 2.88,
"learning_rate": 8.06415432929565e-07,
"loss": 0.4632,
"step": 17670
},
{
"epoch": 2.89,
"learning_rate": 7.951996410946613e-07,
"loss": 0.458,
"step": 17680
},
{
"epoch": 2.89,
"learning_rate": 7.839838492597578e-07,
"loss": 0.4806,
"step": 17690
},
{
"epoch": 2.89,
"learning_rate": 7.727680574248544e-07,
"loss": 0.467,
"step": 17700
},
{
"epoch": 2.89,
"learning_rate": 7.615522655899507e-07,
"loss": 0.4666,
"step": 17710
},
{
"epoch": 2.89,
"learning_rate": 7.503364737550471e-07,
"loss": 0.4675,
"step": 17720
},
{
"epoch": 2.89,
"learning_rate": 7.391206819201436e-07,
"loss": 0.4607,
"step": 17730
},
{
"epoch": 2.89,
"learning_rate": 7.279048900852401e-07,
"loss": 0.4656,
"step": 17740
},
{
"epoch": 2.9,
"learning_rate": 7.166890982503365e-07,
"loss": 0.462,
"step": 17750
},
{
"epoch": 2.9,
"learning_rate": 7.05473306415433e-07,
"loss": 0.4694,
"step": 17760
},
{
"epoch": 2.9,
"learning_rate": 6.942575145805294e-07,
"loss": 0.4644,
"step": 17770
},
{
"epoch": 2.9,
"learning_rate": 6.830417227456259e-07,
"loss": 0.4688,
"step": 17780
},
{
"epoch": 2.9,
"learning_rate": 6.718259309107224e-07,
"loss": 0.4758,
"step": 17790
},
{
"epoch": 2.9,
"learning_rate": 6.606101390758188e-07,
"loss": 0.4581,
"step": 17800
},
{
"epoch": 2.91,
"learning_rate": 6.493943472409153e-07,
"loss": 0.4693,
"step": 17810
},
{
"epoch": 2.91,
"learning_rate": 6.381785554060117e-07,
"loss": 0.4669,
"step": 17820
},
{
"epoch": 2.91,
"learning_rate": 6.269627635711082e-07,
"loss": 0.4576,
"step": 17830
},
{
"epoch": 2.91,
"learning_rate": 6.157469717362046e-07,
"loss": 0.4635,
"step": 17840
},
{
"epoch": 2.91,
"learning_rate": 6.045311799013011e-07,
"loss": 0.4788,
"step": 17850
},
{
"epoch": 2.91,
"learning_rate": 5.933153880663976e-07,
"loss": 0.4627,
"step": 17860
},
{
"epoch": 2.92,
"learning_rate": 5.820995962314939e-07,
"loss": 0.4625,
"step": 17870
},
{
"epoch": 2.92,
"learning_rate": 5.708838043965905e-07,
"loss": 0.4626,
"step": 17880
},
{
"epoch": 2.92,
"learning_rate": 5.596680125616869e-07,
"loss": 0.4663,
"step": 17890
},
{
"epoch": 2.92,
"learning_rate": 5.484522207267833e-07,
"loss": 0.4782,
"step": 17900
},
{
"epoch": 2.92,
"learning_rate": 5.372364288918798e-07,
"loss": 0.4608,
"step": 17910
},
{
"epoch": 2.92,
"learning_rate": 5.260206370569762e-07,
"loss": 0.4672,
"step": 17920
},
{
"epoch": 2.93,
"learning_rate": 5.148048452220728e-07,
"loss": 0.4614,
"step": 17930
},
{
"epoch": 2.93,
"learning_rate": 5.035890533871692e-07,
"loss": 0.4591,
"step": 17940
},
{
"epoch": 2.93,
"learning_rate": 4.923732615522656e-07,
"loss": 0.4624,
"step": 17950
},
{
"epoch": 2.93,
"learning_rate": 4.811574697173621e-07,
"loss": 0.4589,
"step": 17960
},
{
"epoch": 2.93,
"learning_rate": 4.699416778824585e-07,
"loss": 0.4598,
"step": 17970
},
{
"epoch": 2.93,
"learning_rate": 4.58725886047555e-07,
"loss": 0.4641,
"step": 17980
},
{
"epoch": 2.94,
"learning_rate": 4.4751009421265147e-07,
"loss": 0.4718,
"step": 17990
},
{
"epoch": 2.94,
"learning_rate": 4.3629430237774793e-07,
"loss": 0.4635,
"step": 18000
},
{
"epoch": 2.94,
"learning_rate": 4.2507851054284434e-07,
"loss": 0.4641,
"step": 18010
},
{
"epoch": 2.94,
"learning_rate": 4.138627187079408e-07,
"loss": 0.48,
"step": 18020
},
{
"epoch": 2.94,
"learning_rate": 4.0264692687303727e-07,
"loss": 0.4724,
"step": 18030
},
{
"epoch": 2.94,
"learning_rate": 3.914311350381337e-07,
"loss": 0.4706,
"step": 18040
},
{
"epoch": 2.95,
"learning_rate": 3.8021534320323014e-07,
"loss": 0.4717,
"step": 18050
},
{
"epoch": 2.95,
"learning_rate": 3.6899955136832666e-07,
"loss": 0.4662,
"step": 18060
},
{
"epoch": 2.95,
"learning_rate": 3.577837595334231e-07,
"loss": 0.4572,
"step": 18070
},
{
"epoch": 2.95,
"learning_rate": 3.4656796769851953e-07,
"loss": 0.466,
"step": 18080
},
{
"epoch": 2.95,
"learning_rate": 3.35352175863616e-07,
"loss": 0.47,
"step": 18090
},
{
"epoch": 2.95,
"learning_rate": 3.2413638402871246e-07,
"loss": 0.4666,
"step": 18100
},
{
"epoch": 2.96,
"learning_rate": 3.1292059219380887e-07,
"loss": 0.4632,
"step": 18110
},
{
"epoch": 2.96,
"learning_rate": 3.0170480035890533e-07,
"loss": 0.4671,
"step": 18120
},
{
"epoch": 2.96,
"learning_rate": 2.9048900852400185e-07,
"loss": 0.4745,
"step": 18130
},
{
"epoch": 2.96,
"learning_rate": 2.7927321668909826e-07,
"loss": 0.4666,
"step": 18140
},
{
"epoch": 2.96,
"learning_rate": 2.680574248541947e-07,
"loss": 0.4636,
"step": 18150
},
{
"epoch": 2.96,
"learning_rate": 2.568416330192912e-07,
"loss": 0.4618,
"step": 18160
},
{
"epoch": 2.97,
"learning_rate": 2.4562584118438765e-07,
"loss": 0.4618,
"step": 18170
},
{
"epoch": 2.97,
"learning_rate": 2.344100493494841e-07,
"loss": 0.479,
"step": 18180
},
{
"epoch": 2.97,
"learning_rate": 2.2319425751458055e-07,
"loss": 0.4626,
"step": 18190
},
{
"epoch": 2.97,
"learning_rate": 2.11978465679677e-07,
"loss": 0.4701,
"step": 18200
},
{
"epoch": 2.97,
"learning_rate": 2.0076267384477345e-07,
"loss": 0.4585,
"step": 18210
},
{
"epoch": 2.97,
"learning_rate": 1.895468820098699e-07,
"loss": 0.4665,
"step": 18220
},
{
"epoch": 2.97,
"learning_rate": 1.7833109017496637e-07,
"loss": 0.4693,
"step": 18230
},
{
"epoch": 2.98,
"learning_rate": 1.671152983400628e-07,
"loss": 0.4653,
"step": 18240
},
{
"epoch": 2.98,
"learning_rate": 1.5589950650515927e-07,
"loss": 0.4653,
"step": 18250
},
{
"epoch": 2.98,
"learning_rate": 1.4468371467025574e-07,
"loss": 0.4652,
"step": 18260
},
{
"epoch": 2.98,
"learning_rate": 1.3346792283535217e-07,
"loss": 0.4618,
"step": 18270
},
{
"epoch": 2.98,
"learning_rate": 1.2225213100044864e-07,
"loss": 0.4677,
"step": 18280
},
{
"epoch": 2.98,
"learning_rate": 1.110363391655451e-07,
"loss": 0.4629,
"step": 18290
},
{
"epoch": 2.99,
"learning_rate": 9.982054733064156e-08,
"loss": 0.4664,
"step": 18300
},
{
"epoch": 2.99,
"learning_rate": 8.8604755495738e-08,
"loss": 0.4599,
"step": 18310
},
{
"epoch": 2.99,
"learning_rate": 7.738896366083446e-08,
"loss": 0.4586,
"step": 18320
},
{
"epoch": 2.99,
"learning_rate": 6.617317182593091e-08,
"loss": 0.4506,
"step": 18330
},
{
"epoch": 2.99,
"learning_rate": 5.4957379991027376e-08,
"loss": 0.467,
"step": 18340
},
{
"epoch": 2.99,
"learning_rate": 4.374158815612382e-08,
"loss": 0.4655,
"step": 18350
},
{
"epoch": 3.0,
"learning_rate": 3.252579632122028e-08,
"loss": 0.4675,
"step": 18360
},
{
"epoch": 3.0,
"learning_rate": 2.131000448631674e-08,
"loss": 0.4673,
"step": 18370
},
{
"epoch": 3.0,
"learning_rate": 1.009421265141319e-08,
"loss": 0.4584,
"step": 18380
}
],
"logging_steps": 10,
"max_steps": 18384,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.2862587547397652e+19,
"trial_name": null,
"trial_params": null
}