bloom-1b1-focus-sw / trainer_state.json
atsuki-yamaguchi's picture
Upload folder using huggingface_hub
4bfb5f2 verified
raw
history blame
218 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9353078721745908,
"eval_steps": 500,
"global_step": 9000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5e-06,
"loss": 6.4613,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 1e-05,
"loss": 6.3449,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.5e-05,
"loss": 6.2089,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 2e-05,
"loss": 6.0439,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 2.5e-05,
"loss": 5.7149,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 3e-05,
"loss": 5.5198,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 3.5e-05,
"loss": 5.2716,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 4e-05,
"loss": 5.206,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 4.5e-05,
"loss": 5.0601,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 5e-05,
"loss": 4.97,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 5.500000000000001e-05,
"loss": 4.9399,
"step": 55
},
{
"epoch": 0.01,
"learning_rate": 6e-05,
"loss": 4.7808,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 6.500000000000001e-05,
"loss": 4.7396,
"step": 65
},
{
"epoch": 0.01,
"learning_rate": 7e-05,
"loss": 4.6598,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 7.500000000000001e-05,
"loss": 4.5708,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 8e-05,
"loss": 4.5469,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 8.5e-05,
"loss": 4.5547,
"step": 85
},
{
"epoch": 0.01,
"learning_rate": 9e-05,
"loss": 4.3816,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 9.5e-05,
"loss": 4.421,
"step": 95
},
{
"epoch": 0.01,
"learning_rate": 0.0001,
"loss": 4.1978,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 9.99999319664223e-05,
"loss": 4.384,
"step": 105
},
{
"epoch": 0.01,
"learning_rate": 9.999972786587433e-05,
"loss": 4.3236,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 9.99993876989115e-05,
"loss": 4.1347,
"step": 115
},
{
"epoch": 0.01,
"learning_rate": 9.999891146645955e-05,
"loss": 4.1318,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 9.999829916981445e-05,
"loss": 4.1232,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 9.999755081064248e-05,
"loss": 4.0309,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 9.999666639098018e-05,
"loss": 4.0604,
"step": 135
},
{
"epoch": 0.01,
"learning_rate": 9.999564591323436e-05,
"loss": 4.0176,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 9.99944893801821e-05,
"loss": 3.9952,
"step": 145
},
{
"epoch": 0.02,
"learning_rate": 9.99931967949707e-05,
"loss": 3.9909,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 9.999176816111775e-05,
"loss": 3.9839,
"step": 155
},
{
"epoch": 0.02,
"learning_rate": 9.999020348251104e-05,
"loss": 3.9899,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 9.99885027634086e-05,
"loss": 3.9446,
"step": 165
},
{
"epoch": 0.02,
"learning_rate": 9.998666600843867e-05,
"loss": 3.9276,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 9.99846932225997e-05,
"loss": 3.8704,
"step": 175
},
{
"epoch": 0.02,
"learning_rate": 9.99825844112603e-05,
"loss": 3.7708,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 9.998033958015928e-05,
"loss": 3.7353,
"step": 185
},
{
"epoch": 0.02,
"learning_rate": 9.997795873540561e-05,
"loss": 3.8714,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 9.997544188347834e-05,
"loss": 3.7936,
"step": 195
},
{
"epoch": 0.02,
"learning_rate": 9.997278903122673e-05,
"loss": 3.8124,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 9.997000018587011e-05,
"loss": 3.7354,
"step": 205
},
{
"epoch": 0.02,
"learning_rate": 9.996707535499784e-05,
"loss": 3.839,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 9.99640145465694e-05,
"loss": 3.6957,
"step": 215
},
{
"epoch": 0.02,
"learning_rate": 9.996081776891434e-05,
"loss": 3.7322,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 9.995748503073215e-05,
"loss": 3.6692,
"step": 225
},
{
"epoch": 0.02,
"learning_rate": 9.995401634109236e-05,
"loss": 3.7143,
"step": 230
},
{
"epoch": 0.02,
"learning_rate": 9.995041170943447e-05,
"loss": 3.7283,
"step": 235
},
{
"epoch": 0.02,
"learning_rate": 9.994667114556792e-05,
"loss": 3.6956,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 9.994279465967206e-05,
"loss": 3.5947,
"step": 245
},
{
"epoch": 0.03,
"learning_rate": 9.993878226229616e-05,
"loss": 3.6755,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 9.993463396435929e-05,
"loss": 3.6632,
"step": 255
},
{
"epoch": 0.03,
"learning_rate": 9.993034977715044e-05,
"loss": 3.577,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 9.992592971232833e-05,
"loss": 3.6296,
"step": 265
},
{
"epoch": 0.03,
"learning_rate": 9.992137378192145e-05,
"loss": 3.7335,
"step": 270
},
{
"epoch": 0.03,
"learning_rate": 9.991668199832809e-05,
"loss": 3.5873,
"step": 275
},
{
"epoch": 0.03,
"learning_rate": 9.991185437431618e-05,
"loss": 3.5381,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 9.990689092302336e-05,
"loss": 3.5779,
"step": 285
},
{
"epoch": 0.03,
"learning_rate": 9.990179165795686e-05,
"loss": 3.5466,
"step": 290
},
{
"epoch": 0.03,
"learning_rate": 9.989655659299353e-05,
"loss": 3.6173,
"step": 295
},
{
"epoch": 0.03,
"learning_rate": 9.98911857423798e-05,
"loss": 3.5564,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 9.988567912073158e-05,
"loss": 3.5381,
"step": 305
},
{
"epoch": 0.03,
"learning_rate": 9.98800367430343e-05,
"loss": 3.6047,
"step": 310
},
{
"epoch": 0.03,
"learning_rate": 9.987425862464277e-05,
"loss": 3.539,
"step": 315
},
{
"epoch": 0.03,
"learning_rate": 9.986834478128127e-05,
"loss": 3.7013,
"step": 320
},
{
"epoch": 0.03,
"learning_rate": 9.986229522904336e-05,
"loss": 3.5079,
"step": 325
},
{
"epoch": 0.03,
"learning_rate": 9.985610998439197e-05,
"loss": 3.5098,
"step": 330
},
{
"epoch": 0.03,
"learning_rate": 9.984978906415927e-05,
"loss": 3.5938,
"step": 335
},
{
"epoch": 0.04,
"learning_rate": 9.984333248554665e-05,
"loss": 3.5508,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 9.983674026612467e-05,
"loss": 3.5463,
"step": 345
},
{
"epoch": 0.04,
"learning_rate": 9.983001242383302e-05,
"loss": 3.5326,
"step": 350
},
{
"epoch": 0.04,
"learning_rate": 9.98231489769805e-05,
"loss": 3.5422,
"step": 355
},
{
"epoch": 0.04,
"learning_rate": 9.981614994424487e-05,
"loss": 3.5124,
"step": 360
},
{
"epoch": 0.04,
"learning_rate": 9.980901534467291e-05,
"loss": 3.4286,
"step": 365
},
{
"epoch": 0.04,
"learning_rate": 9.980174519768031e-05,
"loss": 3.559,
"step": 370
},
{
"epoch": 0.04,
"learning_rate": 9.979433952305163e-05,
"loss": 3.4403,
"step": 375
},
{
"epoch": 0.04,
"learning_rate": 9.978679834094025e-05,
"loss": 3.4253,
"step": 380
},
{
"epoch": 0.04,
"learning_rate": 9.977912167186834e-05,
"loss": 3.4372,
"step": 385
},
{
"epoch": 0.04,
"learning_rate": 9.977130953672673e-05,
"loss": 3.4207,
"step": 390
},
{
"epoch": 0.04,
"learning_rate": 9.976336195677491e-05,
"loss": 3.5324,
"step": 395
},
{
"epoch": 0.04,
"learning_rate": 9.975527895364098e-05,
"loss": 3.4442,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 9.974706054932158e-05,
"loss": 3.4684,
"step": 405
},
{
"epoch": 0.04,
"learning_rate": 9.973870676618179e-05,
"loss": 3.3819,
"step": 410
},
{
"epoch": 0.04,
"learning_rate": 9.973021762695514e-05,
"loss": 3.4724,
"step": 415
},
{
"epoch": 0.04,
"learning_rate": 9.972159315474348e-05,
"loss": 3.4046,
"step": 420
},
{
"epoch": 0.04,
"learning_rate": 9.971283337301694e-05,
"loss": 3.4115,
"step": 425
},
{
"epoch": 0.04,
"learning_rate": 9.97039383056139e-05,
"loss": 3.3468,
"step": 430
},
{
"epoch": 0.05,
"learning_rate": 9.969490797674091e-05,
"loss": 3.4098,
"step": 435
},
{
"epoch": 0.05,
"learning_rate": 9.968574241097259e-05,
"loss": 3.4411,
"step": 440
},
{
"epoch": 0.05,
"learning_rate": 9.967644163325156e-05,
"loss": 3.398,
"step": 445
},
{
"epoch": 0.05,
"learning_rate": 9.966700566888847e-05,
"loss": 3.4732,
"step": 450
},
{
"epoch": 0.05,
"learning_rate": 9.965743454356178e-05,
"loss": 3.4134,
"step": 455
},
{
"epoch": 0.05,
"learning_rate": 9.964772828331781e-05,
"loss": 3.4411,
"step": 460
},
{
"epoch": 0.05,
"learning_rate": 9.963788691457066e-05,
"loss": 3.4187,
"step": 465
},
{
"epoch": 0.05,
"learning_rate": 9.962791046410202e-05,
"loss": 3.3967,
"step": 470
},
{
"epoch": 0.05,
"learning_rate": 9.961779895906127e-05,
"loss": 3.3737,
"step": 475
},
{
"epoch": 0.05,
"learning_rate": 9.960755242696528e-05,
"loss": 3.2796,
"step": 480
},
{
"epoch": 0.05,
"learning_rate": 9.959717089569837e-05,
"loss": 3.3398,
"step": 485
},
{
"epoch": 0.05,
"learning_rate": 9.958665439351224e-05,
"loss": 3.3882,
"step": 490
},
{
"epoch": 0.05,
"learning_rate": 9.957600294902593e-05,
"loss": 3.3202,
"step": 495
},
{
"epoch": 0.05,
"learning_rate": 9.956521659122567e-05,
"loss": 3.3962,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 9.95542953494648e-05,
"loss": 3.3475,
"step": 505
},
{
"epoch": 0.05,
"learning_rate": 9.954323925346381e-05,
"loss": 3.4164,
"step": 510
},
{
"epoch": 0.05,
"learning_rate": 9.953204833331011e-05,
"loss": 3.4438,
"step": 515
},
{
"epoch": 0.05,
"learning_rate": 9.952072261945804e-05,
"loss": 3.3635,
"step": 520
},
{
"epoch": 0.05,
"learning_rate": 9.950926214272876e-05,
"loss": 3.3403,
"step": 525
},
{
"epoch": 0.06,
"learning_rate": 9.949766693431015e-05,
"loss": 3.2609,
"step": 530
},
{
"epoch": 0.06,
"learning_rate": 9.948593702575675e-05,
"loss": 3.3393,
"step": 535
},
{
"epoch": 0.06,
"learning_rate": 9.947407244898967e-05,
"loss": 3.3297,
"step": 540
},
{
"epoch": 0.06,
"learning_rate": 9.946207323629648e-05,
"loss": 3.3513,
"step": 545
},
{
"epoch": 0.06,
"learning_rate": 9.944993942033119e-05,
"loss": 3.2163,
"step": 550
},
{
"epoch": 0.06,
"learning_rate": 9.943767103411403e-05,
"loss": 3.3271,
"step": 555
},
{
"epoch": 0.06,
"learning_rate": 9.942526811103152e-05,
"loss": 3.2933,
"step": 560
},
{
"epoch": 0.06,
"learning_rate": 9.941273068483625e-05,
"loss": 3.2704,
"step": 565
},
{
"epoch": 0.06,
"learning_rate": 9.940005878964689e-05,
"loss": 3.3276,
"step": 570
},
{
"epoch": 0.06,
"learning_rate": 9.938725245994798e-05,
"loss": 3.1371,
"step": 575
},
{
"epoch": 0.06,
"learning_rate": 9.937431173058996e-05,
"loss": 3.2738,
"step": 580
},
{
"epoch": 0.06,
"learning_rate": 9.9361236636789e-05,
"loss": 3.2159,
"step": 585
},
{
"epoch": 0.06,
"learning_rate": 9.934802721412686e-05,
"loss": 3.3331,
"step": 590
},
{
"epoch": 0.06,
"learning_rate": 9.933468349855099e-05,
"loss": 3.2437,
"step": 595
},
{
"epoch": 0.06,
"learning_rate": 9.932120552637418e-05,
"loss": 3.3399,
"step": 600
},
{
"epoch": 0.06,
"learning_rate": 9.930759333427459e-05,
"loss": 3.3702,
"step": 605
},
{
"epoch": 0.06,
"learning_rate": 9.929384695929572e-05,
"loss": 3.3661,
"step": 610
},
{
"epoch": 0.06,
"learning_rate": 9.927996643884614e-05,
"loss": 3.2874,
"step": 615
},
{
"epoch": 0.06,
"learning_rate": 9.92659518106995e-05,
"loss": 3.3997,
"step": 620
},
{
"epoch": 0.06,
"learning_rate": 9.925180311299446e-05,
"loss": 3.3174,
"step": 625
},
{
"epoch": 0.07,
"learning_rate": 9.923752038423443e-05,
"loss": 3.2904,
"step": 630
},
{
"epoch": 0.07,
"learning_rate": 9.922310366328763e-05,
"loss": 3.2539,
"step": 635
},
{
"epoch": 0.07,
"learning_rate": 9.920855298938691e-05,
"loss": 3.3159,
"step": 640
},
{
"epoch": 0.07,
"learning_rate": 9.919386840212964e-05,
"loss": 3.2853,
"step": 645
},
{
"epoch": 0.07,
"learning_rate": 9.917904994147764e-05,
"loss": 3.2502,
"step": 650
},
{
"epoch": 0.07,
"learning_rate": 9.916409764775699e-05,
"loss": 3.3135,
"step": 655
},
{
"epoch": 0.07,
"learning_rate": 9.914901156165804e-05,
"loss": 3.2354,
"step": 660
},
{
"epoch": 0.07,
"learning_rate": 9.91337917242352e-05,
"loss": 3.3656,
"step": 665
},
{
"epoch": 0.07,
"learning_rate": 9.911843817690686e-05,
"loss": 3.3112,
"step": 670
},
{
"epoch": 0.07,
"learning_rate": 9.91029509614553e-05,
"loss": 3.2751,
"step": 675
},
{
"epoch": 0.07,
"learning_rate": 9.908733012002655e-05,
"loss": 3.3689,
"step": 680
},
{
"epoch": 0.07,
"learning_rate": 9.907157569513027e-05,
"loss": 3.294,
"step": 685
},
{
"epoch": 0.07,
"learning_rate": 9.905568772963967e-05,
"loss": 3.2822,
"step": 690
},
{
"epoch": 0.07,
"learning_rate": 9.903966626679133e-05,
"loss": 3.1767,
"step": 695
},
{
"epoch": 0.07,
"learning_rate": 9.902351135018517e-05,
"loss": 3.1662,
"step": 700
},
{
"epoch": 0.07,
"learning_rate": 9.900722302378425e-05,
"loss": 3.265,
"step": 705
},
{
"epoch": 0.07,
"learning_rate": 9.89908013319147e-05,
"loss": 3.1844,
"step": 710
},
{
"epoch": 0.07,
"learning_rate": 9.897424631926557e-05,
"loss": 3.2678,
"step": 715
},
{
"epoch": 0.07,
"learning_rate": 9.895755803088874e-05,
"loss": 3.1701,
"step": 720
},
{
"epoch": 0.08,
"learning_rate": 9.894073651219877e-05,
"loss": 3.1247,
"step": 725
},
{
"epoch": 0.08,
"learning_rate": 9.892378180897277e-05,
"loss": 3.1464,
"step": 730
},
{
"epoch": 0.08,
"learning_rate": 9.890669396735032e-05,
"loss": 3.1766,
"step": 735
},
{
"epoch": 0.08,
"learning_rate": 9.888947303383329e-05,
"loss": 3.1608,
"step": 740
},
{
"epoch": 0.08,
"learning_rate": 9.887211905528576e-05,
"loss": 3.2586,
"step": 745
},
{
"epoch": 0.08,
"learning_rate": 9.885463207893383e-05,
"loss": 3.253,
"step": 750
},
{
"epoch": 0.08,
"learning_rate": 9.88370121523656e-05,
"loss": 3.198,
"step": 755
},
{
"epoch": 0.08,
"learning_rate": 9.881925932353091e-05,
"loss": 3.2423,
"step": 760
},
{
"epoch": 0.08,
"learning_rate": 9.880137364074132e-05,
"loss": 3.1733,
"step": 765
},
{
"epoch": 0.08,
"learning_rate": 9.878335515266989e-05,
"loss": 3.1777,
"step": 770
},
{
"epoch": 0.08,
"learning_rate": 9.876520390835112e-05,
"loss": 3.2398,
"step": 775
},
{
"epoch": 0.08,
"learning_rate": 9.874691995718077e-05,
"loss": 3.1689,
"step": 780
},
{
"epoch": 0.08,
"learning_rate": 9.872850334891575e-05,
"loss": 3.2627,
"step": 785
},
{
"epoch": 0.08,
"learning_rate": 9.870995413367397e-05,
"loss": 3.1886,
"step": 790
},
{
"epoch": 0.08,
"learning_rate": 9.869127236193419e-05,
"loss": 3.2871,
"step": 795
},
{
"epoch": 0.08,
"learning_rate": 9.867245808453594e-05,
"loss": 3.234,
"step": 800
},
{
"epoch": 0.08,
"learning_rate": 9.865351135267931e-05,
"loss": 3.1879,
"step": 805
},
{
"epoch": 0.08,
"learning_rate": 9.863443221792487e-05,
"loss": 3.1875,
"step": 810
},
{
"epoch": 0.08,
"learning_rate": 9.861522073219349e-05,
"loss": 3.2127,
"step": 815
},
{
"epoch": 0.09,
"learning_rate": 9.859587694776621e-05,
"loss": 3.1911,
"step": 820
},
{
"epoch": 0.09,
"learning_rate": 9.857640091728411e-05,
"loss": 3.134,
"step": 825
},
{
"epoch": 0.09,
"learning_rate": 9.855679269374816e-05,
"loss": 3.1748,
"step": 830
},
{
"epoch": 0.09,
"learning_rate": 9.853705233051904e-05,
"loss": 3.1883,
"step": 835
},
{
"epoch": 0.09,
"learning_rate": 9.851717988131705e-05,
"loss": 3.0893,
"step": 840
},
{
"epoch": 0.09,
"learning_rate": 9.849717540022199e-05,
"loss": 3.221,
"step": 845
},
{
"epoch": 0.09,
"learning_rate": 9.847703894167286e-05,
"loss": 3.2442,
"step": 850
},
{
"epoch": 0.09,
"learning_rate": 9.84567705604679e-05,
"loss": 3.168,
"step": 855
},
{
"epoch": 0.09,
"learning_rate": 9.843637031176433e-05,
"loss": 3.2322,
"step": 860
},
{
"epoch": 0.09,
"learning_rate": 9.841583825107821e-05,
"loss": 3.1372,
"step": 865
},
{
"epoch": 0.09,
"learning_rate": 9.839517443428434e-05,
"loss": 3.09,
"step": 870
},
{
"epoch": 0.09,
"learning_rate": 9.837437891761606e-05,
"loss": 3.0985,
"step": 875
},
{
"epoch": 0.09,
"learning_rate": 9.835345175766509e-05,
"loss": 3.1977,
"step": 880
},
{
"epoch": 0.09,
"learning_rate": 9.833239301138142e-05,
"loss": 3.2027,
"step": 885
},
{
"epoch": 0.09,
"learning_rate": 9.83112027360731e-05,
"loss": 3.1813,
"step": 890
},
{
"epoch": 0.09,
"learning_rate": 9.828988098940619e-05,
"loss": 3.2466,
"step": 895
},
{
"epoch": 0.09,
"learning_rate": 9.826842782940445e-05,
"loss": 3.2112,
"step": 900
},
{
"epoch": 0.09,
"learning_rate": 9.824684331444927e-05,
"loss": 3.2527,
"step": 905
},
{
"epoch": 0.09,
"learning_rate": 9.822512750327953e-05,
"loss": 3.15,
"step": 910
},
{
"epoch": 0.1,
"learning_rate": 9.820328045499143e-05,
"loss": 3.1752,
"step": 915
},
{
"epoch": 0.1,
"learning_rate": 9.818130222903828e-05,
"loss": 3.1903,
"step": 920
},
{
"epoch": 0.1,
"learning_rate": 9.815919288523035e-05,
"loss": 3.1915,
"step": 925
},
{
"epoch": 0.1,
"learning_rate": 9.813695248373474e-05,
"loss": 3.2435,
"step": 930
},
{
"epoch": 0.1,
"learning_rate": 9.811458108507527e-05,
"loss": 3.1334,
"step": 935
},
{
"epoch": 0.1,
"learning_rate": 9.809207875013213e-05,
"loss": 3.2241,
"step": 940
},
{
"epoch": 0.1,
"learning_rate": 9.806944554014193e-05,
"loss": 3.1198,
"step": 945
},
{
"epoch": 0.1,
"learning_rate": 9.80466815166974e-05,
"loss": 3.1322,
"step": 950
},
{
"epoch": 0.1,
"learning_rate": 9.802378674174724e-05,
"loss": 3.0204,
"step": 955
},
{
"epoch": 0.1,
"learning_rate": 9.800076127759599e-05,
"loss": 3.2674,
"step": 960
},
{
"epoch": 0.1,
"learning_rate": 9.797760518690385e-05,
"loss": 3.1515,
"step": 965
},
{
"epoch": 0.1,
"learning_rate": 9.795431853268648e-05,
"loss": 3.1364,
"step": 970
},
{
"epoch": 0.1,
"learning_rate": 9.793090137831486e-05,
"loss": 3.2124,
"step": 975
},
{
"epoch": 0.1,
"learning_rate": 9.79073537875151e-05,
"loss": 3.1288,
"step": 980
},
{
"epoch": 0.1,
"learning_rate": 9.788367582436828e-05,
"loss": 3.1042,
"step": 985
},
{
"epoch": 0.1,
"learning_rate": 9.785986755331025e-05,
"loss": 3.2284,
"step": 990
},
{
"epoch": 0.1,
"learning_rate": 9.78359290391315e-05,
"loss": 3.2302,
"step": 995
},
{
"epoch": 0.1,
"learning_rate": 9.781186034697692e-05,
"loss": 3.1145,
"step": 1000
},
{
"epoch": 0.1,
"learning_rate": 9.778766154234569e-05,
"loss": 3.1682,
"step": 1005
},
{
"epoch": 0.1,
"learning_rate": 9.776333269109107e-05,
"loss": 3.1781,
"step": 1010
},
{
"epoch": 0.11,
"learning_rate": 9.77388738594202e-05,
"loss": 3.0873,
"step": 1015
},
{
"epoch": 0.11,
"learning_rate": 9.771428511389395e-05,
"loss": 3.1021,
"step": 1020
},
{
"epoch": 0.11,
"learning_rate": 9.768956652142674e-05,
"loss": 3.0975,
"step": 1025
},
{
"epoch": 0.11,
"learning_rate": 9.766471814928635e-05,
"loss": 3.1241,
"step": 1030
},
{
"epoch": 0.11,
"learning_rate": 9.76397400650937e-05,
"loss": 3.0822,
"step": 1035
},
{
"epoch": 0.11,
"learning_rate": 9.761463233682276e-05,
"loss": 3.1236,
"step": 1040
},
{
"epoch": 0.11,
"learning_rate": 9.758939503280024e-05,
"loss": 3.1732,
"step": 1045
},
{
"epoch": 0.11,
"learning_rate": 9.756402822170553e-05,
"loss": 3.1124,
"step": 1050
},
{
"epoch": 0.11,
"learning_rate": 9.753853197257042e-05,
"loss": 3.274,
"step": 1055
},
{
"epoch": 0.11,
"learning_rate": 9.751290635477895e-05,
"loss": 3.1269,
"step": 1060
},
{
"epoch": 0.11,
"learning_rate": 9.748715143806719e-05,
"loss": 3.1343,
"step": 1065
},
{
"epoch": 0.11,
"learning_rate": 9.746126729252316e-05,
"loss": 3.1461,
"step": 1070
},
{
"epoch": 0.11,
"learning_rate": 9.743525398858646e-05,
"loss": 3.1007,
"step": 1075
},
{
"epoch": 0.11,
"learning_rate": 9.740911159704824e-05,
"loss": 3.2817,
"step": 1080
},
{
"epoch": 0.11,
"learning_rate": 9.738284018905091e-05,
"loss": 3.1324,
"step": 1085
},
{
"epoch": 0.11,
"learning_rate": 9.735643983608797e-05,
"loss": 3.0239,
"step": 1090
},
{
"epoch": 0.11,
"learning_rate": 9.732991061000385e-05,
"loss": 3.1179,
"step": 1095
},
{
"epoch": 0.11,
"learning_rate": 9.730325258299366e-05,
"loss": 3.0433,
"step": 1100
},
{
"epoch": 0.11,
"learning_rate": 9.727646582760308e-05,
"loss": 3.1044,
"step": 1105
},
{
"epoch": 0.12,
"learning_rate": 9.724955041672803e-05,
"loss": 3.0011,
"step": 1110
},
{
"epoch": 0.12,
"learning_rate": 9.722250642361456e-05,
"loss": 3.0646,
"step": 1115
},
{
"epoch": 0.12,
"learning_rate": 9.719533392185872e-05,
"loss": 3.1282,
"step": 1120
},
{
"epoch": 0.12,
"learning_rate": 9.716803298540613e-05,
"loss": 3.1745,
"step": 1125
},
{
"epoch": 0.12,
"learning_rate": 9.714060368855206e-05,
"loss": 3.1198,
"step": 1130
},
{
"epoch": 0.12,
"learning_rate": 9.711304610594104e-05,
"loss": 3.0571,
"step": 1135
},
{
"epoch": 0.12,
"learning_rate": 9.708536031256666e-05,
"loss": 3.1822,
"step": 1140
},
{
"epoch": 0.12,
"learning_rate": 9.705754638377151e-05,
"loss": 3.129,
"step": 1145
},
{
"epoch": 0.12,
"learning_rate": 9.70296043952468e-05,
"loss": 3.124,
"step": 1150
},
{
"epoch": 0.12,
"learning_rate": 9.70015344230323e-05,
"loss": 3.1571,
"step": 1155
},
{
"epoch": 0.12,
"learning_rate": 9.697333654351602e-05,
"loss": 3.1151,
"step": 1160
},
{
"epoch": 0.12,
"learning_rate": 9.694501083343405e-05,
"loss": 3.0869,
"step": 1165
},
{
"epoch": 0.12,
"learning_rate": 9.691655736987038e-05,
"loss": 3.1241,
"step": 1170
},
{
"epoch": 0.12,
"learning_rate": 9.688797623025666e-05,
"loss": 3.0611,
"step": 1175
},
{
"epoch": 0.12,
"learning_rate": 9.685926749237195e-05,
"loss": 3.0933,
"step": 1180
},
{
"epoch": 0.12,
"learning_rate": 9.683043123434261e-05,
"loss": 3.1017,
"step": 1185
},
{
"epoch": 0.12,
"learning_rate": 9.680146753464197e-05,
"loss": 3.1141,
"step": 1190
},
{
"epoch": 0.12,
"learning_rate": 9.677237647209017e-05,
"loss": 3.0519,
"step": 1195
},
{
"epoch": 0.12,
"learning_rate": 9.674315812585402e-05,
"loss": 3.0872,
"step": 1200
},
{
"epoch": 0.13,
"learning_rate": 9.671381257544665e-05,
"loss": 3.1233,
"step": 1205
},
{
"epoch": 0.13,
"learning_rate": 9.668433990072737e-05,
"loss": 3.0345,
"step": 1210
},
{
"epoch": 0.13,
"learning_rate": 9.665474018190142e-05,
"loss": 3.0633,
"step": 1215
},
{
"epoch": 0.13,
"learning_rate": 9.662501349951981e-05,
"loss": 2.9734,
"step": 1220
},
{
"epoch": 0.13,
"learning_rate": 9.659515993447904e-05,
"loss": 3.0144,
"step": 1225
},
{
"epoch": 0.13,
"learning_rate": 9.656517956802092e-05,
"loss": 3.0022,
"step": 1230
},
{
"epoch": 0.13,
"learning_rate": 9.653507248173227e-05,
"loss": 3.1476,
"step": 1235
},
{
"epoch": 0.13,
"learning_rate": 9.650483875754483e-05,
"loss": 3.035,
"step": 1240
},
{
"epoch": 0.13,
"learning_rate": 9.647447847773497e-05,
"loss": 3.1612,
"step": 1245
},
{
"epoch": 0.13,
"learning_rate": 9.644399172492336e-05,
"loss": 3.0087,
"step": 1250
},
{
"epoch": 0.13,
"learning_rate": 9.641337858207495e-05,
"loss": 3.0989,
"step": 1255
},
{
"epoch": 0.13,
"learning_rate": 9.638263913249861e-05,
"loss": 3.0553,
"step": 1260
},
{
"epoch": 0.13,
"learning_rate": 9.635177345984692e-05,
"loss": 3.1579,
"step": 1265
},
{
"epoch": 0.13,
"learning_rate": 9.632078164811598e-05,
"loss": 2.9481,
"step": 1270
},
{
"epoch": 0.13,
"learning_rate": 9.628966378164513e-05,
"loss": 2.9853,
"step": 1275
},
{
"epoch": 0.13,
"learning_rate": 9.625841994511675e-05,
"loss": 3.1718,
"step": 1280
},
{
"epoch": 0.13,
"learning_rate": 9.622705022355607e-05,
"loss": 3.1015,
"step": 1285
},
{
"epoch": 0.13,
"learning_rate": 9.619555470233083e-05,
"loss": 2.9687,
"step": 1290
},
{
"epoch": 0.13,
"learning_rate": 9.616393346715119e-05,
"loss": 3.1194,
"step": 1295
},
{
"epoch": 0.14,
"learning_rate": 9.613218660406933e-05,
"loss": 3.0765,
"step": 1300
},
{
"epoch": 0.14,
"learning_rate": 9.61003141994794e-05,
"loss": 2.957,
"step": 1305
},
{
"epoch": 0.14,
"learning_rate": 9.606831634011712e-05,
"loss": 3.0512,
"step": 1310
},
{
"epoch": 0.14,
"learning_rate": 9.603619311305968e-05,
"loss": 3.0628,
"step": 1315
},
{
"epoch": 0.14,
"learning_rate": 9.600394460572537e-05,
"loss": 3.0169,
"step": 1320
},
{
"epoch": 0.14,
"learning_rate": 9.597157090587346e-05,
"loss": 2.9306,
"step": 1325
},
{
"epoch": 0.14,
"learning_rate": 9.593907210160387e-05,
"loss": 3.1524,
"step": 1330
},
{
"epoch": 0.14,
"learning_rate": 9.590644828135702e-05,
"loss": 3.0448,
"step": 1335
},
{
"epoch": 0.14,
"learning_rate": 9.587369953391352e-05,
"loss": 3.056,
"step": 1340
},
{
"epoch": 0.14,
"learning_rate": 9.584082594839394e-05,
"loss": 3.0549,
"step": 1345
},
{
"epoch": 0.14,
"learning_rate": 9.580782761425859e-05,
"loss": 3.0811,
"step": 1350
},
{
"epoch": 0.14,
"learning_rate": 9.577470462130726e-05,
"loss": 2.978,
"step": 1355
},
{
"epoch": 0.14,
"learning_rate": 9.574145705967898e-05,
"loss": 3.0529,
"step": 1360
},
{
"epoch": 0.14,
"learning_rate": 9.570808501985175e-05,
"loss": 2.9169,
"step": 1365
},
{
"epoch": 0.14,
"learning_rate": 9.567458859264238e-05,
"loss": 3.0417,
"step": 1370
},
{
"epoch": 0.14,
"learning_rate": 9.56409678692061e-05,
"loss": 2.9647,
"step": 1375
},
{
"epoch": 0.14,
"learning_rate": 9.560722294103646e-05,
"loss": 3.0852,
"step": 1380
},
{
"epoch": 0.14,
"learning_rate": 9.557335389996499e-05,
"loss": 2.9684,
"step": 1385
},
{
"epoch": 0.14,
"learning_rate": 9.553936083816095e-05,
"loss": 3.0088,
"step": 1390
},
{
"epoch": 0.14,
"learning_rate": 9.550524384813114e-05,
"loss": 3.1141,
"step": 1395
},
{
"epoch": 0.15,
"learning_rate": 9.54710030227196e-05,
"loss": 3.0406,
"step": 1400
},
{
"epoch": 0.15,
"learning_rate": 9.543663845510736e-05,
"loss": 3.049,
"step": 1405
},
{
"epoch": 0.15,
"learning_rate": 9.540215023881219e-05,
"loss": 3.1169,
"step": 1410
},
{
"epoch": 0.15,
"learning_rate": 9.536753846768835e-05,
"loss": 2.9199,
"step": 1415
},
{
"epoch": 0.15,
"learning_rate": 9.53328032359264e-05,
"loss": 2.9955,
"step": 1420
},
{
"epoch": 0.15,
"learning_rate": 9.529794463805275e-05,
"loss": 3.0653,
"step": 1425
},
{
"epoch": 0.15,
"learning_rate": 9.526296276892965e-05,
"loss": 3.185,
"step": 1430
},
{
"epoch": 0.15,
"learning_rate": 9.522785772375475e-05,
"loss": 2.9872,
"step": 1435
},
{
"epoch": 0.15,
"learning_rate": 9.519262959806095e-05,
"loss": 3.0537,
"step": 1440
},
{
"epoch": 0.15,
"learning_rate": 9.515727848771605e-05,
"loss": 2.9043,
"step": 1445
},
{
"epoch": 0.15,
"learning_rate": 9.512180448892254e-05,
"loss": 2.9015,
"step": 1450
},
{
"epoch": 0.15,
"learning_rate": 9.508620769821734e-05,
"loss": 3.0544,
"step": 1455
},
{
"epoch": 0.15,
"learning_rate": 9.505048821247156e-05,
"loss": 2.983,
"step": 1460
},
{
"epoch": 0.15,
"learning_rate": 9.501464612889015e-05,
"loss": 3.003,
"step": 1465
},
{
"epoch": 0.15,
"learning_rate": 9.497868154501172e-05,
"loss": 3.0052,
"step": 1470
},
{
"epoch": 0.15,
"learning_rate": 9.494259455870824e-05,
"loss": 2.9862,
"step": 1475
},
{
"epoch": 0.15,
"learning_rate": 9.49063852681848e-05,
"loss": 3.1277,
"step": 1480
},
{
"epoch": 0.15,
"learning_rate": 9.48700537719793e-05,
"loss": 2.9797,
"step": 1485
},
{
"epoch": 0.15,
"learning_rate": 9.48336001689622e-05,
"loss": 2.869,
"step": 1490
},
{
"epoch": 0.16,
"learning_rate": 9.479702455833626e-05,
"loss": 3.0126,
"step": 1495
},
{
"epoch": 0.16,
"learning_rate": 9.476032703963625e-05,
"loss": 3.0621,
"step": 1500
},
{
"epoch": 0.16,
"learning_rate": 9.472350771272875e-05,
"loss": 3.076,
"step": 1505
},
{
"epoch": 0.16,
"learning_rate": 9.468656667781174e-05,
"loss": 3.0326,
"step": 1510
},
{
"epoch": 0.16,
"learning_rate": 9.464950403541447e-05,
"loss": 3.0655,
"step": 1515
},
{
"epoch": 0.16,
"learning_rate": 9.461231988639711e-05,
"loss": 3.0635,
"step": 1520
},
{
"epoch": 0.16,
"learning_rate": 9.457501433195048e-05,
"loss": 3.1336,
"step": 1525
},
{
"epoch": 0.16,
"learning_rate": 9.45375874735958e-05,
"loss": 3.0775,
"step": 1530
},
{
"epoch": 0.16,
"learning_rate": 9.450003941318438e-05,
"loss": 3.0088,
"step": 1535
},
{
"epoch": 0.16,
"learning_rate": 9.44623702528974e-05,
"loss": 3.1038,
"step": 1540
},
{
"epoch": 0.16,
"learning_rate": 9.442458009524554e-05,
"loss": 3.0736,
"step": 1545
},
{
"epoch": 0.16,
"learning_rate": 9.43866690430688e-05,
"loss": 2.9574,
"step": 1550
},
{
"epoch": 0.16,
"learning_rate": 9.434863719953618e-05,
"loss": 2.9601,
"step": 1555
},
{
"epoch": 0.16,
"learning_rate": 9.431048466814534e-05,
"loss": 3.0349,
"step": 1560
},
{
"epoch": 0.16,
"learning_rate": 9.427221155272241e-05,
"loss": 3.0865,
"step": 1565
},
{
"epoch": 0.16,
"learning_rate": 9.42338179574217e-05,
"loss": 3.0631,
"step": 1570
},
{
"epoch": 0.16,
"learning_rate": 9.419530398672533e-05,
"loss": 2.9913,
"step": 1575
},
{
"epoch": 0.16,
"learning_rate": 9.415666974544305e-05,
"loss": 3.0652,
"step": 1580
},
{
"epoch": 0.16,
"learning_rate": 9.411791533871186e-05,
"loss": 2.9632,
"step": 1585
},
{
"epoch": 0.17,
"learning_rate": 9.407904087199582e-05,
"loss": 3.0972,
"step": 1590
},
{
"epoch": 0.17,
"learning_rate": 9.404004645108568e-05,
"loss": 3.0728,
"step": 1595
},
{
"epoch": 0.17,
"learning_rate": 9.400093218209865e-05,
"loss": 2.9743,
"step": 1600
},
{
"epoch": 0.17,
"learning_rate": 9.396169817147805e-05,
"loss": 2.9296,
"step": 1605
},
{
"epoch": 0.17,
"learning_rate": 9.392234452599312e-05,
"loss": 3.004,
"step": 1610
},
{
"epoch": 0.17,
"learning_rate": 9.388287135273861e-05,
"loss": 3.0129,
"step": 1615
},
{
"epoch": 0.17,
"learning_rate": 9.384327875913457e-05,
"loss": 2.9088,
"step": 1620
},
{
"epoch": 0.17,
"learning_rate": 9.380356685292603e-05,
"loss": 2.9814,
"step": 1625
},
{
"epoch": 0.17,
"learning_rate": 9.376373574218272e-05,
"loss": 3.0918,
"step": 1630
},
{
"epoch": 0.17,
"learning_rate": 9.372378553529875e-05,
"loss": 2.9426,
"step": 1635
},
{
"epoch": 0.17,
"learning_rate": 9.368371634099236e-05,
"loss": 2.9851,
"step": 1640
},
{
"epoch": 0.17,
"learning_rate": 9.364352826830555e-05,
"loss": 2.9476,
"step": 1645
},
{
"epoch": 0.17,
"learning_rate": 9.360322142660388e-05,
"loss": 3.067,
"step": 1650
},
{
"epoch": 0.17,
"learning_rate": 9.356279592557607e-05,
"loss": 3.0532,
"step": 1655
},
{
"epoch": 0.17,
"learning_rate": 9.352225187523378e-05,
"loss": 3.1064,
"step": 1660
},
{
"epoch": 0.17,
"learning_rate": 9.34815893859113e-05,
"loss": 2.9766,
"step": 1665
},
{
"epoch": 0.17,
"learning_rate": 9.344080856826521e-05,
"loss": 3.0918,
"step": 1670
},
{
"epoch": 0.17,
"learning_rate": 9.339990953327411e-05,
"loss": 2.9851,
"step": 1675
},
{
"epoch": 0.17,
"learning_rate": 9.33588923922383e-05,
"loss": 3.0343,
"step": 1680
},
{
"epoch": 0.18,
"learning_rate": 9.331775725677949e-05,
"loss": 3.0074,
"step": 1685
},
{
"epoch": 0.18,
"learning_rate": 9.327650423884051e-05,
"loss": 2.9863,
"step": 1690
},
{
"epoch": 0.18,
"learning_rate": 9.323513345068496e-05,
"loss": 3.0563,
"step": 1695
},
{
"epoch": 0.18,
"learning_rate": 9.319364500489696e-05,
"loss": 3.1017,
"step": 1700
},
{
"epoch": 0.18,
"learning_rate": 9.315203901438082e-05,
"loss": 3.0385,
"step": 1705
},
{
"epoch": 0.18,
"learning_rate": 9.311031559236067e-05,
"loss": 3.0245,
"step": 1710
},
{
"epoch": 0.18,
"learning_rate": 9.30684748523803e-05,
"loss": 3.0084,
"step": 1715
},
{
"epoch": 0.18,
"learning_rate": 9.302651690830271e-05,
"loss": 2.9123,
"step": 1720
},
{
"epoch": 0.18,
"learning_rate": 9.298444187430986e-05,
"loss": 3.0955,
"step": 1725
},
{
"epoch": 0.18,
"learning_rate": 9.294224986490236e-05,
"loss": 3.0408,
"step": 1730
},
{
"epoch": 0.18,
"learning_rate": 9.289994099489913e-05,
"loss": 2.9581,
"step": 1735
},
{
"epoch": 0.18,
"learning_rate": 9.285751537943712e-05,
"loss": 3.0821,
"step": 1740
},
{
"epoch": 0.18,
"learning_rate": 9.281497313397101e-05,
"loss": 2.9833,
"step": 1745
},
{
"epoch": 0.18,
"learning_rate": 9.27723143742728e-05,
"loss": 2.9601,
"step": 1750
},
{
"epoch": 0.18,
"learning_rate": 9.272953921643168e-05,
"loss": 2.9825,
"step": 1755
},
{
"epoch": 0.18,
"learning_rate": 9.268664777685345e-05,
"loss": 2.9493,
"step": 1760
},
{
"epoch": 0.18,
"learning_rate": 9.264364017226051e-05,
"loss": 2.8516,
"step": 1765
},
{
"epoch": 0.18,
"learning_rate": 9.260051651969128e-05,
"loss": 2.9573,
"step": 1770
},
{
"epoch": 0.18,
"learning_rate": 9.255727693649998e-05,
"loss": 3.0197,
"step": 1775
},
{
"epoch": 0.18,
"learning_rate": 9.251392154035641e-05,
"loss": 3.0124,
"step": 1780
},
{
"epoch": 0.19,
"learning_rate": 9.247045044924542e-05,
"loss": 3.0337,
"step": 1785
},
{
"epoch": 0.19,
"learning_rate": 9.242686378146682e-05,
"loss": 2.9469,
"step": 1790
},
{
"epoch": 0.19,
"learning_rate": 9.238316165563485e-05,
"loss": 3.0674,
"step": 1795
},
{
"epoch": 0.19,
"learning_rate": 9.233934419067802e-05,
"loss": 2.9407,
"step": 1800
},
{
"epoch": 0.19,
"learning_rate": 9.229541150583865e-05,
"loss": 3.0143,
"step": 1805
},
{
"epoch": 0.19,
"learning_rate": 9.225136372067266e-05,
"loss": 2.9884,
"step": 1810
},
{
"epoch": 0.19,
"learning_rate": 9.22072009550492e-05,
"loss": 3.0212,
"step": 1815
},
{
"epoch": 0.19,
"learning_rate": 9.216292332915031e-05,
"loss": 3.0117,
"step": 1820
},
{
"epoch": 0.19,
"learning_rate": 9.211853096347058e-05,
"loss": 3.0236,
"step": 1825
},
{
"epoch": 0.19,
"learning_rate": 9.20740239788169e-05,
"loss": 2.9824,
"step": 1830
},
{
"epoch": 0.19,
"learning_rate": 9.202940249630801e-05,
"loss": 2.8586,
"step": 1835
},
{
"epoch": 0.19,
"learning_rate": 9.198466663737431e-05,
"loss": 2.9688,
"step": 1840
},
{
"epoch": 0.19,
"learning_rate": 9.193981652375738e-05,
"loss": 2.9999,
"step": 1845
},
{
"epoch": 0.19,
"learning_rate": 9.189485227750981e-05,
"loss": 2.9553,
"step": 1850
},
{
"epoch": 0.19,
"learning_rate": 9.18497740209947e-05,
"loss": 2.998,
"step": 1855
},
{
"epoch": 0.19,
"learning_rate": 9.180458187688549e-05,
"loss": 2.897,
"step": 1860
},
{
"epoch": 0.19,
"learning_rate": 9.175927596816549e-05,
"loss": 3.0075,
"step": 1865
},
{
"epoch": 0.19,
"learning_rate": 9.171385641812763e-05,
"loss": 2.9874,
"step": 1870
},
{
"epoch": 0.19,
"learning_rate": 9.166832335037408e-05,
"loss": 2.8981,
"step": 1875
},
{
"epoch": 0.2,
"learning_rate": 9.162267688881596e-05,
"loss": 2.9879,
"step": 1880
},
{
"epoch": 0.2,
"learning_rate": 9.157691715767293e-05,
"loss": 3.049,
"step": 1885
},
{
"epoch": 0.2,
"learning_rate": 9.153104428147294e-05,
"loss": 2.9897,
"step": 1890
},
{
"epoch": 0.2,
"learning_rate": 9.148505838505181e-05,
"loss": 3.0117,
"step": 1895
},
{
"epoch": 0.2,
"learning_rate": 9.143895959355293e-05,
"loss": 2.9164,
"step": 1900
},
{
"epoch": 0.2,
"learning_rate": 9.139274803242697e-05,
"loss": 3.0124,
"step": 1905
},
{
"epoch": 0.2,
"learning_rate": 9.13464238274314e-05,
"loss": 2.9396,
"step": 1910
},
{
"epoch": 0.2,
"learning_rate": 9.12999871046303e-05,
"loss": 3.0676,
"step": 1915
},
{
"epoch": 0.2,
"learning_rate": 9.125343799039394e-05,
"loss": 2.8926,
"step": 1920
},
{
"epoch": 0.2,
"learning_rate": 9.120677661139839e-05,
"loss": 2.9644,
"step": 1925
},
{
"epoch": 0.2,
"learning_rate": 9.11600030946253e-05,
"loss": 3.0003,
"step": 1930
},
{
"epoch": 0.2,
"learning_rate": 9.111311756736145e-05,
"loss": 2.9588,
"step": 1935
},
{
"epoch": 0.2,
"learning_rate": 9.106612015719845e-05,
"loss": 2.9926,
"step": 1940
},
{
"epoch": 0.2,
"learning_rate": 9.101901099203239e-05,
"loss": 2.984,
"step": 1945
},
{
"epoch": 0.2,
"learning_rate": 9.097179020006343e-05,
"loss": 3.0174,
"step": 1950
},
{
"epoch": 0.2,
"learning_rate": 9.09244579097956e-05,
"loss": 2.8545,
"step": 1955
},
{
"epoch": 0.2,
"learning_rate": 9.087701425003627e-05,
"loss": 2.9614,
"step": 1960
},
{
"epoch": 0.2,
"learning_rate": 9.082945934989591e-05,
"loss": 2.87,
"step": 1965
},
{
"epoch": 0.2,
"learning_rate": 9.078179333878774e-05,
"loss": 2.9313,
"step": 1970
},
{
"epoch": 0.21,
"learning_rate": 9.073401634642733e-05,
"loss": 2.9791,
"step": 1975
},
{
"epoch": 0.21,
"learning_rate": 9.068612850283227e-05,
"loss": 2.9854,
"step": 1980
},
{
"epoch": 0.21,
"learning_rate": 9.063812993832179e-05,
"loss": 2.9578,
"step": 1985
},
{
"epoch": 0.21,
"learning_rate": 9.059002078351648e-05,
"loss": 2.8883,
"step": 1990
},
{
"epoch": 0.21,
"learning_rate": 9.054180116933783e-05,
"loss": 2.9564,
"step": 1995
},
{
"epoch": 0.21,
"learning_rate": 9.049347122700799e-05,
"loss": 2.8903,
"step": 2000
},
{
"epoch": 0.21,
"learning_rate": 9.04450310880493e-05,
"loss": 2.979,
"step": 2005
},
{
"epoch": 0.21,
"learning_rate": 9.039648088428397e-05,
"loss": 2.9795,
"step": 2010
},
{
"epoch": 0.21,
"learning_rate": 9.034782074783379e-05,
"loss": 2.9617,
"step": 2015
},
{
"epoch": 0.21,
"learning_rate": 9.02990508111197e-05,
"loss": 2.9152,
"step": 2020
},
{
"epoch": 0.21,
"learning_rate": 9.025017120686142e-05,
"loss": 2.9112,
"step": 2025
},
{
"epoch": 0.21,
"learning_rate": 9.02011820680771e-05,
"loss": 2.8487,
"step": 2030
},
{
"epoch": 0.21,
"learning_rate": 9.015208352808301e-05,
"loss": 3.0114,
"step": 2035
},
{
"epoch": 0.21,
"learning_rate": 9.010287572049314e-05,
"loss": 2.9559,
"step": 2040
},
{
"epoch": 0.21,
"learning_rate": 9.00535587792188e-05,
"loss": 3.0355,
"step": 2045
},
{
"epoch": 0.21,
"learning_rate": 9.000413283846831e-05,
"loss": 2.9393,
"step": 2050
},
{
"epoch": 0.21,
"learning_rate": 8.995459803274664e-05,
"loss": 3.0242,
"step": 2055
},
{
"epoch": 0.21,
"learning_rate": 8.990495449685494e-05,
"loss": 2.959,
"step": 2060
},
{
"epoch": 0.21,
"learning_rate": 8.985520236589036e-05,
"loss": 3.0212,
"step": 2065
},
{
"epoch": 0.22,
"learning_rate": 8.980534177524547e-05,
"loss": 2.9202,
"step": 2070
},
{
"epoch": 0.22,
"learning_rate": 8.97553728606081e-05,
"loss": 2.8971,
"step": 2075
},
{
"epoch": 0.22,
"learning_rate": 8.970529575796073e-05,
"loss": 2.9288,
"step": 2080
},
{
"epoch": 0.22,
"learning_rate": 8.965511060358043e-05,
"loss": 2.9017,
"step": 2085
},
{
"epoch": 0.22,
"learning_rate": 8.960481753403815e-05,
"loss": 2.8285,
"step": 2090
},
{
"epoch": 0.22,
"learning_rate": 8.955441668619864e-05,
"loss": 2.9122,
"step": 2095
},
{
"epoch": 0.22,
"learning_rate": 8.950390819721986e-05,
"loss": 2.9906,
"step": 2100
},
{
"epoch": 0.22,
"learning_rate": 8.945329220455279e-05,
"loss": 2.856,
"step": 2105
},
{
"epoch": 0.22,
"learning_rate": 8.940256884594085e-05,
"loss": 2.955,
"step": 2110
},
{
"epoch": 0.22,
"learning_rate": 8.935173825941973e-05,
"loss": 3.0154,
"step": 2115
},
{
"epoch": 0.22,
"learning_rate": 8.930080058331691e-05,
"loss": 3.0212,
"step": 2120
},
{
"epoch": 0.22,
"learning_rate": 8.924975595625128e-05,
"loss": 2.9544,
"step": 2125
},
{
"epoch": 0.22,
"learning_rate": 8.919860451713277e-05,
"loss": 3.0194,
"step": 2130
},
{
"epoch": 0.22,
"learning_rate": 8.914734640516201e-05,
"loss": 2.9159,
"step": 2135
},
{
"epoch": 0.22,
"learning_rate": 8.909598175982989e-05,
"loss": 2.9733,
"step": 2140
},
{
"epoch": 0.22,
"learning_rate": 8.904451072091724e-05,
"loss": 2.8918,
"step": 2145
},
{
"epoch": 0.22,
"learning_rate": 8.899293342849444e-05,
"loss": 2.8608,
"step": 2150
},
{
"epoch": 0.22,
"learning_rate": 8.894125002292097e-05,
"loss": 2.9483,
"step": 2155
},
{
"epoch": 0.22,
"learning_rate": 8.888946064484514e-05,
"loss": 2.9175,
"step": 2160
},
{
"epoch": 0.22,
"learning_rate": 8.883756543520359e-05,
"loss": 2.895,
"step": 2165
},
{
"epoch": 0.23,
"learning_rate": 8.8785564535221e-05,
"loss": 2.9723,
"step": 2170
},
{
"epoch": 0.23,
"learning_rate": 8.873345808640964e-05,
"loss": 2.8408,
"step": 2175
},
{
"epoch": 0.23,
"learning_rate": 8.86812462305691e-05,
"loss": 2.8632,
"step": 2180
},
{
"epoch": 0.23,
"learning_rate": 8.862892910978568e-05,
"loss": 2.9123,
"step": 2185
},
{
"epoch": 0.23,
"learning_rate": 8.857650686643226e-05,
"loss": 2.9912,
"step": 2190
},
{
"epoch": 0.23,
"learning_rate": 8.852397964316773e-05,
"loss": 2.9239,
"step": 2195
},
{
"epoch": 0.23,
"learning_rate": 8.847134758293669e-05,
"loss": 2.8868,
"step": 2200
},
{
"epoch": 0.23,
"learning_rate": 8.841861082896905e-05,
"loss": 3.0361,
"step": 2205
},
{
"epoch": 0.23,
"learning_rate": 8.836576952477959e-05,
"loss": 2.9736,
"step": 2210
},
{
"epoch": 0.23,
"learning_rate": 8.831282381416764e-05,
"loss": 2.8319,
"step": 2215
},
{
"epoch": 0.23,
"learning_rate": 8.825977384121666e-05,
"loss": 2.9304,
"step": 2220
},
{
"epoch": 0.23,
"learning_rate": 8.820661975029381e-05,
"loss": 2.9279,
"step": 2225
},
{
"epoch": 0.23,
"learning_rate": 8.81533616860496e-05,
"loss": 2.7954,
"step": 2230
},
{
"epoch": 0.23,
"learning_rate": 8.80999997934175e-05,
"loss": 2.9533,
"step": 2235
},
{
"epoch": 0.23,
"learning_rate": 8.804653421761355e-05,
"loss": 2.9548,
"step": 2240
},
{
"epoch": 0.23,
"learning_rate": 8.799296510413592e-05,
"loss": 2.9468,
"step": 2245
},
{
"epoch": 0.23,
"learning_rate": 8.793929259876453e-05,
"loss": 2.9835,
"step": 2250
},
{
"epoch": 0.23,
"learning_rate": 8.78855168475607e-05,
"loss": 2.8785,
"step": 2255
},
{
"epoch": 0.23,
"learning_rate": 8.783163799686669e-05,
"loss": 2.9608,
"step": 2260
},
{
"epoch": 0.24,
"learning_rate": 8.777765619330534e-05,
"loss": 2.9345,
"step": 2265
},
{
"epoch": 0.24,
"learning_rate": 8.772357158377966e-05,
"loss": 2.9231,
"step": 2270
},
{
"epoch": 0.24,
"learning_rate": 8.766938431547242e-05,
"loss": 2.9217,
"step": 2275
},
{
"epoch": 0.24,
"learning_rate": 8.761509453584579e-05,
"loss": 2.9481,
"step": 2280
},
{
"epoch": 0.24,
"learning_rate": 8.75607023926409e-05,
"loss": 2.9546,
"step": 2285
},
{
"epoch": 0.24,
"learning_rate": 8.75062080338774e-05,
"loss": 2.8486,
"step": 2290
},
{
"epoch": 0.24,
"learning_rate": 8.745161160785312e-05,
"loss": 2.9985,
"step": 2295
},
{
"epoch": 0.24,
"learning_rate": 8.739691326314373e-05,
"loss": 2.9397,
"step": 2300
},
{
"epoch": 0.24,
"learning_rate": 8.734211314860215e-05,
"loss": 2.9366,
"step": 2305
},
{
"epoch": 0.24,
"learning_rate": 8.728721141335829e-05,
"loss": 2.8284,
"step": 2310
},
{
"epoch": 0.24,
"learning_rate": 8.723220820681863e-05,
"loss": 2.8884,
"step": 2315
},
{
"epoch": 0.24,
"learning_rate": 8.717710367866579e-05,
"loss": 2.8313,
"step": 2320
},
{
"epoch": 0.24,
"learning_rate": 8.712189797885804e-05,
"loss": 3.0081,
"step": 2325
},
{
"epoch": 0.24,
"learning_rate": 8.706659125762905e-05,
"loss": 2.8746,
"step": 2330
},
{
"epoch": 0.24,
"learning_rate": 8.701118366548741e-05,
"loss": 2.9563,
"step": 2335
},
{
"epoch": 0.24,
"learning_rate": 8.695567535321616e-05,
"loss": 2.9495,
"step": 2340
},
{
"epoch": 0.24,
"learning_rate": 8.690006647187249e-05,
"loss": 2.8903,
"step": 2345
},
{
"epoch": 0.24,
"learning_rate": 8.684435717278723e-05,
"loss": 2.8455,
"step": 2350
},
{
"epoch": 0.24,
"learning_rate": 8.67885476075645e-05,
"loss": 3.0095,
"step": 2355
},
{
"epoch": 0.25,
"learning_rate": 8.673263792808126e-05,
"loss": 2.9536,
"step": 2360
},
{
"epoch": 0.25,
"learning_rate": 8.667662828648695e-05,
"loss": 2.8477,
"step": 2365
},
{
"epoch": 0.25,
"learning_rate": 8.662051883520304e-05,
"loss": 2.8341,
"step": 2370
},
{
"epoch": 0.25,
"learning_rate": 8.656430972692254e-05,
"loss": 2.8837,
"step": 2375
},
{
"epoch": 0.25,
"learning_rate": 8.650800111460978e-05,
"loss": 2.8969,
"step": 2380
},
{
"epoch": 0.25,
"learning_rate": 8.645159315149978e-05,
"loss": 2.8371,
"step": 2385
},
{
"epoch": 0.25,
"learning_rate": 8.639508599109798e-05,
"loss": 2.882,
"step": 2390
},
{
"epoch": 0.25,
"learning_rate": 8.633847978717972e-05,
"loss": 2.8732,
"step": 2395
},
{
"epoch": 0.25,
"learning_rate": 8.628177469378995e-05,
"loss": 2.9184,
"step": 2400
},
{
"epoch": 0.25,
"learning_rate": 8.622497086524266e-05,
"loss": 2.8852,
"step": 2405
},
{
"epoch": 0.25,
"learning_rate": 8.616806845612055e-05,
"loss": 2.946,
"step": 2410
},
{
"epoch": 0.25,
"learning_rate": 8.61110676212746e-05,
"loss": 2.9721,
"step": 2415
},
{
"epoch": 0.25,
"learning_rate": 8.605396851582366e-05,
"loss": 2.9052,
"step": 2420
},
{
"epoch": 0.25,
"learning_rate": 8.599677129515397e-05,
"loss": 2.9323,
"step": 2425
},
{
"epoch": 0.25,
"learning_rate": 8.593947611491879e-05,
"loss": 2.9266,
"step": 2430
},
{
"epoch": 0.25,
"learning_rate": 8.588208313103798e-05,
"loss": 3.0087,
"step": 2435
},
{
"epoch": 0.25,
"learning_rate": 8.582459249969752e-05,
"loss": 2.9049,
"step": 2440
},
{
"epoch": 0.25,
"learning_rate": 8.576700437734916e-05,
"loss": 2.9007,
"step": 2445
},
{
"epoch": 0.25,
"learning_rate": 8.570931892070992e-05,
"loss": 2.9514,
"step": 2450
},
{
"epoch": 0.26,
"learning_rate": 8.565153628676175e-05,
"loss": 2.8754,
"step": 2455
},
{
"epoch": 0.26,
"learning_rate": 8.559365663275101e-05,
"loss": 2.8701,
"step": 2460
},
{
"epoch": 0.26,
"learning_rate": 8.553568011618809e-05,
"loss": 2.8955,
"step": 2465
},
{
"epoch": 0.26,
"learning_rate": 8.547760689484698e-05,
"loss": 2.9266,
"step": 2470
},
{
"epoch": 0.26,
"learning_rate": 8.541943712676484e-05,
"loss": 2.9582,
"step": 2475
},
{
"epoch": 0.26,
"learning_rate": 8.536117097024158e-05,
"loss": 2.9981,
"step": 2480
},
{
"epoch": 0.26,
"learning_rate": 8.530280858383942e-05,
"loss": 2.9132,
"step": 2485
},
{
"epoch": 0.26,
"learning_rate": 8.524435012638241e-05,
"loss": 2.7794,
"step": 2490
},
{
"epoch": 0.26,
"learning_rate": 8.518579575695606e-05,
"loss": 2.9536,
"step": 2495
},
{
"epoch": 0.26,
"learning_rate": 8.512714563490694e-05,
"loss": 2.9408,
"step": 2500
},
{
"epoch": 0.26,
"learning_rate": 8.506839991984212e-05,
"loss": 2.9056,
"step": 2505
},
{
"epoch": 0.26,
"learning_rate": 8.500955877162886e-05,
"loss": 2.7341,
"step": 2510
},
{
"epoch": 0.26,
"learning_rate": 8.495062235039411e-05,
"loss": 3.0268,
"step": 2515
},
{
"epoch": 0.26,
"learning_rate": 8.48915908165241e-05,
"loss": 2.9117,
"step": 2520
},
{
"epoch": 0.26,
"learning_rate": 8.483246433066389e-05,
"loss": 2.9346,
"step": 2525
},
{
"epoch": 0.26,
"learning_rate": 8.477324305371692e-05,
"loss": 2.8996,
"step": 2530
},
{
"epoch": 0.26,
"learning_rate": 8.471392714684462e-05,
"loss": 2.8797,
"step": 2535
},
{
"epoch": 0.26,
"learning_rate": 8.465451677146592e-05,
"loss": 2.9372,
"step": 2540
},
{
"epoch": 0.26,
"learning_rate": 8.459501208925681e-05,
"loss": 2.9304,
"step": 2545
},
{
"epoch": 0.27,
"learning_rate": 8.453541326215e-05,
"loss": 2.8766,
"step": 2550
},
{
"epoch": 0.27,
"learning_rate": 8.44757204523343e-05,
"loss": 2.9487,
"step": 2555
},
{
"epoch": 0.27,
"learning_rate": 8.441593382225434e-05,
"loss": 2.8892,
"step": 2560
},
{
"epoch": 0.27,
"learning_rate": 8.435605353461007e-05,
"loss": 2.9837,
"step": 2565
},
{
"epoch": 0.27,
"learning_rate": 8.42960797523563e-05,
"loss": 2.9294,
"step": 2570
},
{
"epoch": 0.27,
"learning_rate": 8.423601263870222e-05,
"loss": 2.9302,
"step": 2575
},
{
"epoch": 0.27,
"learning_rate": 8.417585235711112e-05,
"loss": 2.8034,
"step": 2580
},
{
"epoch": 0.27,
"learning_rate": 8.41155990712997e-05,
"loss": 2.9417,
"step": 2585
},
{
"epoch": 0.27,
"learning_rate": 8.405525294523791e-05,
"loss": 2.9515,
"step": 2590
},
{
"epoch": 0.27,
"learning_rate": 8.39948141431482e-05,
"loss": 2.8899,
"step": 2595
},
{
"epoch": 0.27,
"learning_rate": 8.393428282950529e-05,
"loss": 2.9017,
"step": 2600
},
{
"epoch": 0.27,
"learning_rate": 8.38736591690357e-05,
"loss": 2.9023,
"step": 2605
},
{
"epoch": 0.27,
"learning_rate": 8.381294332671714e-05,
"loss": 2.8277,
"step": 2610
},
{
"epoch": 0.27,
"learning_rate": 8.375213546777829e-05,
"loss": 2.8121,
"step": 2615
},
{
"epoch": 0.27,
"learning_rate": 8.369123575769822e-05,
"loss": 2.8544,
"step": 2620
},
{
"epoch": 0.27,
"learning_rate": 8.36302443622059e-05,
"loss": 2.874,
"step": 2625
},
{
"epoch": 0.27,
"learning_rate": 8.356916144727985e-05,
"loss": 2.7807,
"step": 2630
},
{
"epoch": 0.27,
"learning_rate": 8.350798717914766e-05,
"loss": 2.8339,
"step": 2635
},
{
"epoch": 0.27,
"learning_rate": 8.344672172428547e-05,
"loss": 2.7752,
"step": 2640
},
{
"epoch": 0.27,
"learning_rate": 8.338536524941763e-05,
"loss": 2.8343,
"step": 2645
},
{
"epoch": 0.28,
"learning_rate": 8.332391792151613e-05,
"loss": 2.8354,
"step": 2650
},
{
"epoch": 0.28,
"learning_rate": 8.326237990780028e-05,
"loss": 2.9109,
"step": 2655
},
{
"epoch": 0.28,
"learning_rate": 8.320075137573609e-05,
"loss": 2.7692,
"step": 2660
},
{
"epoch": 0.28,
"learning_rate": 8.313903249303595e-05,
"loss": 2.927,
"step": 2665
},
{
"epoch": 0.28,
"learning_rate": 8.307722342765813e-05,
"loss": 2.9333,
"step": 2670
},
{
"epoch": 0.28,
"learning_rate": 8.30153243478063e-05,
"loss": 2.9822,
"step": 2675
},
{
"epoch": 0.28,
"learning_rate": 8.295333542192907e-05,
"loss": 2.8408,
"step": 2680
},
{
"epoch": 0.28,
"learning_rate": 8.289125681871961e-05,
"loss": 2.9706,
"step": 2685
},
{
"epoch": 0.28,
"learning_rate": 8.282908870711507e-05,
"loss": 2.9692,
"step": 2690
},
{
"epoch": 0.28,
"learning_rate": 8.276683125629625e-05,
"loss": 2.8531,
"step": 2695
},
{
"epoch": 0.28,
"learning_rate": 8.270448463568697e-05,
"loss": 2.8002,
"step": 2700
},
{
"epoch": 0.28,
"learning_rate": 8.264204901495385e-05,
"loss": 2.8538,
"step": 2705
},
{
"epoch": 0.28,
"learning_rate": 8.257952456400559e-05,
"loss": 2.8076,
"step": 2710
},
{
"epoch": 0.28,
"learning_rate": 8.251691145299269e-05,
"loss": 2.9552,
"step": 2715
},
{
"epoch": 0.28,
"learning_rate": 8.245420985230691e-05,
"loss": 2.9521,
"step": 2720
},
{
"epoch": 0.28,
"learning_rate": 8.239141993258082e-05,
"loss": 2.8715,
"step": 2725
},
{
"epoch": 0.28,
"learning_rate": 8.23285418646873e-05,
"loss": 2.8694,
"step": 2730
},
{
"epoch": 0.28,
"learning_rate": 8.226557581973919e-05,
"loss": 2.9244,
"step": 2735
},
{
"epoch": 0.28,
"learning_rate": 8.22025219690887e-05,
"loss": 2.8796,
"step": 2740
},
{
"epoch": 0.29,
"learning_rate": 8.213938048432697e-05,
"loss": 2.8755,
"step": 2745
},
{
"epoch": 0.29,
"learning_rate": 8.207615153728365e-05,
"loss": 2.9238,
"step": 2750
},
{
"epoch": 0.29,
"learning_rate": 8.201283530002641e-05,
"loss": 2.8313,
"step": 2755
},
{
"epoch": 0.29,
"learning_rate": 8.194943194486045e-05,
"loss": 2.8629,
"step": 2760
},
{
"epoch": 0.29,
"learning_rate": 8.188594164432806e-05,
"loss": 2.938,
"step": 2765
},
{
"epoch": 0.29,
"learning_rate": 8.18223645712081e-05,
"loss": 2.8708,
"step": 2770
},
{
"epoch": 0.29,
"learning_rate": 8.175870089851564e-05,
"loss": 2.9425,
"step": 2775
},
{
"epoch": 0.29,
"learning_rate": 8.169495079950138e-05,
"loss": 2.9216,
"step": 2780
},
{
"epoch": 0.29,
"learning_rate": 8.163111444765119e-05,
"loss": 2.8267,
"step": 2785
},
{
"epoch": 0.29,
"learning_rate": 8.156719201668567e-05,
"loss": 2.7973,
"step": 2790
},
{
"epoch": 0.29,
"learning_rate": 8.150318368055973e-05,
"loss": 2.8693,
"step": 2795
},
{
"epoch": 0.29,
"learning_rate": 8.143908961346197e-05,
"loss": 2.8766,
"step": 2800
},
{
"epoch": 0.29,
"learning_rate": 8.137490998981437e-05,
"loss": 2.8617,
"step": 2805
},
{
"epoch": 0.29,
"learning_rate": 8.13106449842717e-05,
"loss": 2.8376,
"step": 2810
},
{
"epoch": 0.29,
"learning_rate": 8.124629477172107e-05,
"loss": 2.8324,
"step": 2815
},
{
"epoch": 0.29,
"learning_rate": 8.118185952728152e-05,
"loss": 2.7969,
"step": 2820
},
{
"epoch": 0.29,
"learning_rate": 8.111733942630345e-05,
"loss": 2.7714,
"step": 2825
},
{
"epoch": 0.29,
"learning_rate": 8.105273464436816e-05,
"loss": 2.8233,
"step": 2830
},
{
"epoch": 0.29,
"learning_rate": 8.098804535728746e-05,
"loss": 2.8751,
"step": 2835
},
{
"epoch": 0.3,
"learning_rate": 8.092327174110309e-05,
"loss": 2.9221,
"step": 2840
},
{
"epoch": 0.3,
"learning_rate": 8.085841397208628e-05,
"loss": 2.8923,
"step": 2845
},
{
"epoch": 0.3,
"learning_rate": 8.079347222673727e-05,
"loss": 2.9066,
"step": 2850
},
{
"epoch": 0.3,
"learning_rate": 8.072844668178485e-05,
"loss": 2.8664,
"step": 2855
},
{
"epoch": 0.3,
"learning_rate": 8.066333751418583e-05,
"loss": 2.8604,
"step": 2860
},
{
"epoch": 0.3,
"learning_rate": 8.059814490112456e-05,
"loss": 2.7464,
"step": 2865
},
{
"epoch": 0.3,
"learning_rate": 8.053286902001256e-05,
"loss": 2.8067,
"step": 2870
},
{
"epoch": 0.3,
"learning_rate": 8.046751004848787e-05,
"loss": 2.9104,
"step": 2875
},
{
"epoch": 0.3,
"learning_rate": 8.040206816441469e-05,
"loss": 2.7966,
"step": 2880
},
{
"epoch": 0.3,
"learning_rate": 8.033654354588282e-05,
"loss": 2.9083,
"step": 2885
},
{
"epoch": 0.3,
"learning_rate": 8.027093637120725e-05,
"loss": 2.8404,
"step": 2890
},
{
"epoch": 0.3,
"learning_rate": 8.020524681892761e-05,
"loss": 2.8589,
"step": 2895
},
{
"epoch": 0.3,
"learning_rate": 8.013947506780768e-05,
"loss": 2.9243,
"step": 2900
},
{
"epoch": 0.3,
"learning_rate": 8.007362129683501e-05,
"loss": 2.7473,
"step": 2905
},
{
"epoch": 0.3,
"learning_rate": 8.000768568522028e-05,
"loss": 2.8833,
"step": 2910
},
{
"epoch": 0.3,
"learning_rate": 7.994166841239692e-05,
"loss": 2.881,
"step": 2915
},
{
"epoch": 0.3,
"learning_rate": 7.987556965802056e-05,
"loss": 2.921,
"step": 2920
},
{
"epoch": 0.3,
"learning_rate": 7.980938960196861e-05,
"loss": 2.8373,
"step": 2925
},
{
"epoch": 0.3,
"learning_rate": 7.974312842433971e-05,
"loss": 2.8599,
"step": 2930
},
{
"epoch": 0.31,
"learning_rate": 7.967678630545326e-05,
"loss": 2.7906,
"step": 2935
},
{
"epoch": 0.31,
"learning_rate": 7.961036342584891e-05,
"loss": 2.9024,
"step": 2940
},
{
"epoch": 0.31,
"learning_rate": 7.954385996628611e-05,
"loss": 2.8931,
"step": 2945
},
{
"epoch": 0.31,
"learning_rate": 7.947727610774361e-05,
"loss": 2.9323,
"step": 2950
},
{
"epoch": 0.31,
"learning_rate": 7.941061203141893e-05,
"loss": 2.8569,
"step": 2955
},
{
"epoch": 0.31,
"learning_rate": 7.934386791872787e-05,
"loss": 2.8745,
"step": 2960
},
{
"epoch": 0.31,
"learning_rate": 7.927704395130408e-05,
"loss": 2.7357,
"step": 2965
},
{
"epoch": 0.31,
"learning_rate": 7.921014031099852e-05,
"loss": 2.8097,
"step": 2970
},
{
"epoch": 0.31,
"learning_rate": 7.914315717987892e-05,
"loss": 2.97,
"step": 2975
},
{
"epoch": 0.31,
"learning_rate": 7.907609474022939e-05,
"loss": 2.8674,
"step": 2980
},
{
"epoch": 0.31,
"learning_rate": 7.900895317454981e-05,
"loss": 2.8661,
"step": 2985
},
{
"epoch": 0.31,
"learning_rate": 7.894173266555545e-05,
"loss": 2.9143,
"step": 2990
},
{
"epoch": 0.31,
"learning_rate": 7.887443339617633e-05,
"loss": 2.8549,
"step": 2995
},
{
"epoch": 0.31,
"learning_rate": 7.880705554955689e-05,
"loss": 2.8276,
"step": 3000
},
{
"epoch": 0.31,
"learning_rate": 7.873959930905536e-05,
"loss": 2.8145,
"step": 3005
},
{
"epoch": 0.31,
"learning_rate": 7.867206485824332e-05,
"loss": 2.8467,
"step": 3010
},
{
"epoch": 0.31,
"learning_rate": 7.860445238090518e-05,
"loss": 2.9239,
"step": 3015
},
{
"epoch": 0.31,
"learning_rate": 7.853676206103768e-05,
"loss": 2.8305,
"step": 3020
},
{
"epoch": 0.31,
"learning_rate": 7.846899408284942e-05,
"loss": 2.825,
"step": 3025
},
{
"epoch": 0.31,
"learning_rate": 7.840114863076031e-05,
"loss": 2.8385,
"step": 3030
},
{
"epoch": 0.32,
"learning_rate": 7.833322588940111e-05,
"loss": 2.8311,
"step": 3035
},
{
"epoch": 0.32,
"learning_rate": 7.826522604361289e-05,
"loss": 2.8175,
"step": 3040
},
{
"epoch": 0.32,
"learning_rate": 7.819714927844658e-05,
"loss": 2.8342,
"step": 3045
},
{
"epoch": 0.32,
"learning_rate": 7.81289957791624e-05,
"loss": 2.7799,
"step": 3050
},
{
"epoch": 0.32,
"learning_rate": 7.806076573122942e-05,
"loss": 2.7581,
"step": 3055
},
{
"epoch": 0.32,
"learning_rate": 7.7992459320325e-05,
"loss": 2.9057,
"step": 3060
},
{
"epoch": 0.32,
"learning_rate": 7.792407673233433e-05,
"loss": 2.8804,
"step": 3065
},
{
"epoch": 0.32,
"learning_rate": 7.78556181533499e-05,
"loss": 2.8632,
"step": 3070
},
{
"epoch": 0.32,
"learning_rate": 7.778708376967096e-05,
"loss": 2.9208,
"step": 3075
},
{
"epoch": 0.32,
"learning_rate": 7.77184737678031e-05,
"loss": 2.8708,
"step": 3080
},
{
"epoch": 0.32,
"learning_rate": 7.76497883344577e-05,
"loss": 2.8846,
"step": 3085
},
{
"epoch": 0.32,
"learning_rate": 7.758102765655137e-05,
"loss": 2.8381,
"step": 3090
},
{
"epoch": 0.32,
"learning_rate": 7.751219192120549e-05,
"loss": 2.8954,
"step": 3095
},
{
"epoch": 0.32,
"learning_rate": 7.744328131574575e-05,
"loss": 2.8707,
"step": 3100
},
{
"epoch": 0.32,
"learning_rate": 7.737429602770152e-05,
"loss": 2.8147,
"step": 3105
},
{
"epoch": 0.32,
"learning_rate": 7.730523624480546e-05,
"loss": 2.7896,
"step": 3110
},
{
"epoch": 0.32,
"learning_rate": 7.723610215499291e-05,
"loss": 2.8004,
"step": 3115
},
{
"epoch": 0.32,
"learning_rate": 7.716689394640148e-05,
"loss": 2.7648,
"step": 3120
},
{
"epoch": 0.32,
"learning_rate": 7.709761180737043e-05,
"loss": 2.8324,
"step": 3125
},
{
"epoch": 0.33,
"learning_rate": 7.702825592644025e-05,
"loss": 2.8576,
"step": 3130
},
{
"epoch": 0.33,
"learning_rate": 7.695882649235206e-05,
"loss": 2.7944,
"step": 3135
},
{
"epoch": 0.33,
"learning_rate": 7.68893236940472e-05,
"loss": 2.904,
"step": 3140
},
{
"epoch": 0.33,
"learning_rate": 7.68197477206666e-05,
"loss": 2.8531,
"step": 3145
},
{
"epoch": 0.33,
"learning_rate": 7.675009876155038e-05,
"loss": 2.9269,
"step": 3150
},
{
"epoch": 0.33,
"learning_rate": 7.668037700623724e-05,
"loss": 2.8462,
"step": 3155
},
{
"epoch": 0.33,
"learning_rate": 7.661058264446403e-05,
"loss": 2.9048,
"step": 3160
},
{
"epoch": 0.33,
"learning_rate": 7.654071586616513e-05,
"loss": 2.7398,
"step": 3165
},
{
"epoch": 0.33,
"learning_rate": 7.647077686147202e-05,
"loss": 2.7498,
"step": 3170
},
{
"epoch": 0.33,
"learning_rate": 7.640076582071271e-05,
"loss": 2.8127,
"step": 3175
},
{
"epoch": 0.33,
"learning_rate": 7.63306829344113e-05,
"loss": 2.779,
"step": 3180
},
{
"epoch": 0.33,
"learning_rate": 7.626052839328734e-05,
"loss": 2.821,
"step": 3185
},
{
"epoch": 0.33,
"learning_rate": 7.619030238825541e-05,
"loss": 2.7154,
"step": 3190
},
{
"epoch": 0.33,
"learning_rate": 7.612000511042457e-05,
"loss": 2.8133,
"step": 3195
},
{
"epoch": 0.33,
"learning_rate": 7.604963675109786e-05,
"loss": 2.9621,
"step": 3200
},
{
"epoch": 0.33,
"learning_rate": 7.597919750177168e-05,
"loss": 2.8276,
"step": 3205
},
{
"epoch": 0.33,
"learning_rate": 7.590868755413543e-05,
"loss": 2.8818,
"step": 3210
},
{
"epoch": 0.33,
"learning_rate": 7.583810710007087e-05,
"loss": 2.892,
"step": 3215
},
{
"epoch": 0.33,
"learning_rate": 7.576745633165159e-05,
"loss": 2.891,
"step": 3220
},
{
"epoch": 0.34,
"learning_rate": 7.569673544114264e-05,
"loss": 2.786,
"step": 3225
},
{
"epoch": 0.34,
"learning_rate": 7.562594462099977e-05,
"loss": 2.8033,
"step": 3230
},
{
"epoch": 0.34,
"learning_rate": 7.555508406386912e-05,
"loss": 2.8829,
"step": 3235
},
{
"epoch": 0.34,
"learning_rate": 7.548415396258657e-05,
"loss": 2.8386,
"step": 3240
},
{
"epoch": 0.34,
"learning_rate": 7.541315451017725e-05,
"loss": 2.9978,
"step": 3245
},
{
"epoch": 0.34,
"learning_rate": 7.534208589985507e-05,
"loss": 2.8506,
"step": 3250
},
{
"epoch": 0.34,
"learning_rate": 7.527094832502206e-05,
"loss": 2.8115,
"step": 3255
},
{
"epoch": 0.34,
"learning_rate": 7.519974197926801e-05,
"loss": 2.8659,
"step": 3260
},
{
"epoch": 0.34,
"learning_rate": 7.512846705636975e-05,
"loss": 2.8384,
"step": 3265
},
{
"epoch": 0.34,
"learning_rate": 7.505712375029085e-05,
"loss": 2.7822,
"step": 3270
},
{
"epoch": 0.34,
"learning_rate": 7.498571225518096e-05,
"loss": 2.9941,
"step": 3275
},
{
"epoch": 0.34,
"learning_rate": 7.491423276537519e-05,
"loss": 2.8534,
"step": 3280
},
{
"epoch": 0.34,
"learning_rate": 7.48426854753938e-05,
"loss": 2.8649,
"step": 3285
},
{
"epoch": 0.34,
"learning_rate": 7.477107057994149e-05,
"loss": 2.7187,
"step": 3290
},
{
"epoch": 0.34,
"learning_rate": 7.469938827390697e-05,
"loss": 2.8818,
"step": 3295
},
{
"epoch": 0.34,
"learning_rate": 7.46276387523624e-05,
"loss": 2.8718,
"step": 3300
},
{
"epoch": 0.34,
"learning_rate": 7.455582221056282e-05,
"loss": 2.8573,
"step": 3305
},
{
"epoch": 0.34,
"learning_rate": 7.448393884394573e-05,
"loss": 2.7629,
"step": 3310
},
{
"epoch": 0.34,
"learning_rate": 7.441198884813039e-05,
"loss": 2.7802,
"step": 3315
},
{
"epoch": 0.35,
"learning_rate": 7.433997241891742e-05,
"loss": 2.88,
"step": 3320
},
{
"epoch": 0.35,
"learning_rate": 7.426788975228828e-05,
"loss": 2.7699,
"step": 3325
},
{
"epoch": 0.35,
"learning_rate": 7.41957410444046e-05,
"loss": 2.8192,
"step": 3330
},
{
"epoch": 0.35,
"learning_rate": 7.412352649160779e-05,
"loss": 2.7483,
"step": 3335
},
{
"epoch": 0.35,
"learning_rate": 7.405124629041838e-05,
"loss": 2.742,
"step": 3340
},
{
"epoch": 0.35,
"learning_rate": 7.397890063753565e-05,
"loss": 2.8049,
"step": 3345
},
{
"epoch": 0.35,
"learning_rate": 7.390648972983693e-05,
"loss": 2.8123,
"step": 3350
},
{
"epoch": 0.35,
"learning_rate": 7.383401376437716e-05,
"loss": 2.8284,
"step": 3355
},
{
"epoch": 0.35,
"learning_rate": 7.376147293838827e-05,
"loss": 2.8751,
"step": 3360
},
{
"epoch": 0.35,
"learning_rate": 7.368886744927875e-05,
"loss": 2.806,
"step": 3365
},
{
"epoch": 0.35,
"learning_rate": 7.361619749463306e-05,
"loss": 2.7375,
"step": 3370
},
{
"epoch": 0.35,
"learning_rate": 7.354346327221106e-05,
"loss": 2.6563,
"step": 3375
},
{
"epoch": 0.35,
"learning_rate": 7.347066497994756e-05,
"loss": 2.8468,
"step": 3380
},
{
"epoch": 0.35,
"learning_rate": 7.339780281595166e-05,
"loss": 2.9108,
"step": 3385
},
{
"epoch": 0.35,
"learning_rate": 7.332487697850633e-05,
"loss": 2.8629,
"step": 3390
},
{
"epoch": 0.35,
"learning_rate": 7.325188766606776e-05,
"loss": 2.8945,
"step": 3395
},
{
"epoch": 0.35,
"learning_rate": 7.317883507726496e-05,
"loss": 2.8415,
"step": 3400
},
{
"epoch": 0.35,
"learning_rate": 7.310571941089906e-05,
"loss": 2.8292,
"step": 3405
},
{
"epoch": 0.35,
"learning_rate": 7.303254086594287e-05,
"loss": 2.9004,
"step": 3410
},
{
"epoch": 0.35,
"learning_rate": 7.295929964154033e-05,
"loss": 2.8548,
"step": 3415
},
{
"epoch": 0.36,
"learning_rate": 7.288599593700592e-05,
"loss": 2.9506,
"step": 3420
},
{
"epoch": 0.36,
"learning_rate": 7.281262995182422e-05,
"loss": 2.7926,
"step": 3425
},
{
"epoch": 0.36,
"learning_rate": 7.273920188564921e-05,
"loss": 2.8264,
"step": 3430
},
{
"epoch": 0.36,
"learning_rate": 7.266571193830387e-05,
"loss": 2.8826,
"step": 3435
},
{
"epoch": 0.36,
"learning_rate": 7.259216030977954e-05,
"loss": 2.7924,
"step": 3440
},
{
"epoch": 0.36,
"learning_rate": 7.251854720023545e-05,
"loss": 2.6379,
"step": 3445
},
{
"epoch": 0.36,
"learning_rate": 7.244487280999813e-05,
"loss": 2.8199,
"step": 3450
},
{
"epoch": 0.36,
"learning_rate": 7.237113733956087e-05,
"loss": 2.7374,
"step": 3455
},
{
"epoch": 0.36,
"learning_rate": 7.22973409895832e-05,
"loss": 2.8179,
"step": 3460
},
{
"epoch": 0.36,
"learning_rate": 7.222348396089029e-05,
"loss": 2.8539,
"step": 3465
},
{
"epoch": 0.36,
"learning_rate": 7.214956645447244e-05,
"loss": 2.8867,
"step": 3470
},
{
"epoch": 0.36,
"learning_rate": 7.207558867148458e-05,
"loss": 2.9582,
"step": 3475
},
{
"epoch": 0.36,
"learning_rate": 7.200155081324565e-05,
"loss": 2.805,
"step": 3480
},
{
"epoch": 0.36,
"learning_rate": 7.192745308123802e-05,
"loss": 2.8093,
"step": 3485
},
{
"epoch": 0.36,
"learning_rate": 7.185329567710706e-05,
"loss": 2.7565,
"step": 3490
},
{
"epoch": 0.36,
"learning_rate": 7.177907880266051e-05,
"loss": 2.8378,
"step": 3495
},
{
"epoch": 0.36,
"learning_rate": 7.170480265986798e-05,
"loss": 2.8456,
"step": 3500
},
{
"epoch": 0.36,
"learning_rate": 7.16304674508603e-05,
"loss": 2.8982,
"step": 3505
},
{
"epoch": 0.36,
"learning_rate": 7.155607337792912e-05,
"loss": 2.9076,
"step": 3510
},
{
"epoch": 0.37,
"learning_rate": 7.148162064352619e-05,
"loss": 2.8324,
"step": 3515
},
{
"epoch": 0.37,
"learning_rate": 7.140710945026296e-05,
"loss": 2.7857,
"step": 3520
},
{
"epoch": 0.37,
"learning_rate": 7.133254000090997e-05,
"loss": 2.7084,
"step": 3525
},
{
"epoch": 0.37,
"learning_rate": 7.125791249839626e-05,
"loss": 2.7922,
"step": 3530
},
{
"epoch": 0.37,
"learning_rate": 7.118322714580888e-05,
"loss": 2.779,
"step": 3535
},
{
"epoch": 0.37,
"learning_rate": 7.110848414639229e-05,
"loss": 2.8117,
"step": 3540
},
{
"epoch": 0.37,
"learning_rate": 7.103368370354783e-05,
"loss": 2.8082,
"step": 3545
},
{
"epoch": 0.37,
"learning_rate": 7.095882602083322e-05,
"loss": 2.8836,
"step": 3550
},
{
"epoch": 0.37,
"learning_rate": 7.088391130196184e-05,
"loss": 2.7234,
"step": 3555
},
{
"epoch": 0.37,
"learning_rate": 7.080893975080237e-05,
"loss": 2.8132,
"step": 3560
},
{
"epoch": 0.37,
"learning_rate": 7.07339115713781e-05,
"loss": 2.7978,
"step": 3565
},
{
"epoch": 0.37,
"learning_rate": 7.065882696786646e-05,
"loss": 2.8014,
"step": 3570
},
{
"epoch": 0.37,
"learning_rate": 7.058368614459845e-05,
"loss": 2.8027,
"step": 3575
},
{
"epoch": 0.37,
"learning_rate": 7.0508489306058e-05,
"loss": 2.8227,
"step": 3580
},
{
"epoch": 0.37,
"learning_rate": 7.043323665688153e-05,
"loss": 2.8312,
"step": 3585
},
{
"epoch": 0.37,
"learning_rate": 7.035792840185728e-05,
"loss": 2.8661,
"step": 3590
},
{
"epoch": 0.37,
"learning_rate": 7.02825647459249e-05,
"loss": 2.8047,
"step": 3595
},
{
"epoch": 0.37,
"learning_rate": 7.020714589417471e-05,
"loss": 2.7137,
"step": 3600
},
{
"epoch": 0.37,
"learning_rate": 7.013167205184732e-05,
"loss": 2.8715,
"step": 3605
},
{
"epoch": 0.38,
"learning_rate": 7.005614342433291e-05,
"loss": 2.8186,
"step": 3610
},
{
"epoch": 0.38,
"learning_rate": 6.998056021717083e-05,
"loss": 2.8177,
"step": 3615
},
{
"epoch": 0.38,
"learning_rate": 6.990492263604889e-05,
"loss": 2.8388,
"step": 3620
},
{
"epoch": 0.38,
"learning_rate": 6.982923088680293e-05,
"loss": 2.8016,
"step": 3625
},
{
"epoch": 0.38,
"learning_rate": 6.975348517541614e-05,
"loss": 2.7375,
"step": 3630
},
{
"epoch": 0.38,
"learning_rate": 6.967768570801862e-05,
"loss": 2.889,
"step": 3635
},
{
"epoch": 0.38,
"learning_rate": 6.960183269088669e-05,
"loss": 2.7283,
"step": 3640
},
{
"epoch": 0.38,
"learning_rate": 6.952592633044247e-05,
"loss": 2.837,
"step": 3645
},
{
"epoch": 0.38,
"learning_rate": 6.944996683325319e-05,
"loss": 2.7536,
"step": 3650
},
{
"epoch": 0.38,
"learning_rate": 6.937395440603073e-05,
"loss": 2.8266,
"step": 3655
},
{
"epoch": 0.38,
"learning_rate": 6.929788925563095e-05,
"loss": 2.8939,
"step": 3660
},
{
"epoch": 0.38,
"learning_rate": 6.922177158905325e-05,
"loss": 2.7968,
"step": 3665
},
{
"epoch": 0.38,
"learning_rate": 6.914560161343991e-05,
"loss": 2.8182,
"step": 3670
},
{
"epoch": 0.38,
"learning_rate": 6.906937953607555e-05,
"loss": 2.8376,
"step": 3675
},
{
"epoch": 0.38,
"learning_rate": 6.899310556438663e-05,
"loss": 2.7836,
"step": 3680
},
{
"epoch": 0.38,
"learning_rate": 6.891677990594075e-05,
"loss": 2.8653,
"step": 3685
},
{
"epoch": 0.38,
"learning_rate": 6.884040276844627e-05,
"loss": 2.7323,
"step": 3690
},
{
"epoch": 0.38,
"learning_rate": 6.876397435975152e-05,
"loss": 2.7654,
"step": 3695
},
{
"epoch": 0.38,
"learning_rate": 6.86874948878445e-05,
"loss": 2.8828,
"step": 3700
},
{
"epoch": 0.39,
"learning_rate": 6.861096456085206e-05,
"loss": 2.7006,
"step": 3705
},
{
"epoch": 0.39,
"learning_rate": 6.853438358703946e-05,
"loss": 2.8379,
"step": 3710
},
{
"epoch": 0.39,
"learning_rate": 6.845775217480981e-05,
"loss": 2.8011,
"step": 3715
},
{
"epoch": 0.39,
"learning_rate": 6.838107053270348e-05,
"loss": 2.7163,
"step": 3720
},
{
"epoch": 0.39,
"learning_rate": 6.830433886939754e-05,
"loss": 2.8269,
"step": 3725
},
{
"epoch": 0.39,
"learning_rate": 6.822755739370518e-05,
"loss": 2.7764,
"step": 3730
},
{
"epoch": 0.39,
"learning_rate": 6.815072631457512e-05,
"loss": 2.8385,
"step": 3735
},
{
"epoch": 0.39,
"learning_rate": 6.80738458410911e-05,
"loss": 2.7325,
"step": 3740
},
{
"epoch": 0.39,
"learning_rate": 6.799691618247124e-05,
"loss": 2.8233,
"step": 3745
},
{
"epoch": 0.39,
"learning_rate": 6.791993754806757e-05,
"loss": 2.8962,
"step": 3750
},
{
"epoch": 0.39,
"learning_rate": 6.784291014736534e-05,
"loss": 2.7099,
"step": 3755
},
{
"epoch": 0.39,
"learning_rate": 6.776583418998255e-05,
"loss": 2.7089,
"step": 3760
},
{
"epoch": 0.39,
"learning_rate": 6.768870988566934e-05,
"loss": 2.794,
"step": 3765
},
{
"epoch": 0.39,
"learning_rate": 6.761153744430739e-05,
"loss": 2.9411,
"step": 3770
},
{
"epoch": 0.39,
"learning_rate": 6.753431707590939e-05,
"loss": 2.7914,
"step": 3775
},
{
"epoch": 0.39,
"learning_rate": 6.745704899061843e-05,
"loss": 2.8252,
"step": 3780
},
{
"epoch": 0.39,
"learning_rate": 6.737973339870753e-05,
"loss": 2.8672,
"step": 3785
},
{
"epoch": 0.39,
"learning_rate": 6.730237051057892e-05,
"loss": 2.7813,
"step": 3790
},
{
"epoch": 0.39,
"learning_rate": 6.722496053676356e-05,
"loss": 2.7094,
"step": 3795
},
{
"epoch": 0.39,
"learning_rate": 6.714750368792055e-05,
"loss": 2.8326,
"step": 3800
},
{
"epoch": 0.4,
"learning_rate": 6.707000017483655e-05,
"loss": 2.79,
"step": 3805
},
{
"epoch": 0.4,
"learning_rate": 6.699245020842524e-05,
"loss": 2.884,
"step": 3810
},
{
"epoch": 0.4,
"learning_rate": 6.691485399972665e-05,
"loss": 2.8821,
"step": 3815
},
{
"epoch": 0.4,
"learning_rate": 6.68372117599067e-05,
"loss": 2.8124,
"step": 3820
},
{
"epoch": 0.4,
"learning_rate": 6.675952370025656e-05,
"loss": 2.77,
"step": 3825
},
{
"epoch": 0.4,
"learning_rate": 6.66817900321921e-05,
"loss": 2.8628,
"step": 3830
},
{
"epoch": 0.4,
"learning_rate": 6.660401096725331e-05,
"loss": 2.8688,
"step": 3835
},
{
"epoch": 0.4,
"learning_rate": 6.652618671710369e-05,
"loss": 2.7957,
"step": 3840
},
{
"epoch": 0.4,
"learning_rate": 6.644831749352974e-05,
"loss": 2.8844,
"step": 3845
},
{
"epoch": 0.4,
"learning_rate": 6.637040350844035e-05,
"loss": 2.8165,
"step": 3850
},
{
"epoch": 0.4,
"learning_rate": 6.629244497386619e-05,
"loss": 2.7355,
"step": 3855
},
{
"epoch": 0.4,
"learning_rate": 6.62144421019592e-05,
"loss": 2.7875,
"step": 3860
},
{
"epoch": 0.4,
"learning_rate": 6.613639510499193e-05,
"loss": 2.6861,
"step": 3865
},
{
"epoch": 0.4,
"learning_rate": 6.605830419535702e-05,
"loss": 2.8663,
"step": 3870
},
{
"epoch": 0.4,
"learning_rate": 6.598016958556667e-05,
"loss": 2.7667,
"step": 3875
},
{
"epoch": 0.4,
"learning_rate": 6.590199148825197e-05,
"loss": 2.8832,
"step": 3880
},
{
"epoch": 0.4,
"learning_rate": 6.582377011616231e-05,
"loss": 2.8396,
"step": 3885
},
{
"epoch": 0.4,
"learning_rate": 6.574550568216491e-05,
"loss": 2.8842,
"step": 3890
},
{
"epoch": 0.4,
"learning_rate": 6.566719839924412e-05,
"loss": 2.8188,
"step": 3895
},
{
"epoch": 0.41,
"learning_rate": 6.558884848050094e-05,
"loss": 2.8007,
"step": 3900
},
{
"epoch": 0.41,
"learning_rate": 6.55104561391524e-05,
"loss": 2.7435,
"step": 3905
},
{
"epoch": 0.41,
"learning_rate": 6.543202158853091e-05,
"loss": 2.7344,
"step": 3910
},
{
"epoch": 0.41,
"learning_rate": 6.535354504208385e-05,
"loss": 2.7721,
"step": 3915
},
{
"epoch": 0.41,
"learning_rate": 6.527502671337278e-05,
"loss": 2.8337,
"step": 3920
},
{
"epoch": 0.41,
"learning_rate": 6.519646681607305e-05,
"loss": 2.7271,
"step": 3925
},
{
"epoch": 0.41,
"learning_rate": 6.511786556397308e-05,
"loss": 2.8055,
"step": 3930
},
{
"epoch": 0.41,
"learning_rate": 6.503922317097385e-05,
"loss": 2.7727,
"step": 3935
},
{
"epoch": 0.41,
"learning_rate": 6.496053985108829e-05,
"loss": 2.8323,
"step": 3940
},
{
"epoch": 0.41,
"learning_rate": 6.48818158184407e-05,
"loss": 2.7894,
"step": 3945
},
{
"epoch": 0.41,
"learning_rate": 6.480305128726618e-05,
"loss": 2.7845,
"step": 3950
},
{
"epoch": 0.41,
"learning_rate": 6.472424647191007e-05,
"loss": 2.9459,
"step": 3955
},
{
"epoch": 0.41,
"learning_rate": 6.464540158682731e-05,
"loss": 2.8363,
"step": 3960
},
{
"epoch": 0.41,
"learning_rate": 6.456651684658188e-05,
"loss": 2.7561,
"step": 3965
},
{
"epoch": 0.41,
"learning_rate": 6.44875924658462e-05,
"loss": 2.7729,
"step": 3970
},
{
"epoch": 0.41,
"learning_rate": 6.44086286594006e-05,
"loss": 2.7976,
"step": 3975
},
{
"epoch": 0.41,
"learning_rate": 6.432962564213272e-05,
"loss": 2.8236,
"step": 3980
},
{
"epoch": 0.41,
"learning_rate": 6.425058362903684e-05,
"loss": 2.8025,
"step": 3985
},
{
"epoch": 0.41,
"learning_rate": 6.417150283521341e-05,
"loss": 2.7781,
"step": 3990
},
{
"epoch": 0.42,
"learning_rate": 6.40923834758684e-05,
"loss": 2.8142,
"step": 3995
},
{
"epoch": 0.42,
"learning_rate": 6.401322576631276e-05,
"loss": 2.8105,
"step": 4000
},
{
"epoch": 0.42,
"learning_rate": 6.393402992196176e-05,
"loss": 2.7935,
"step": 4005
},
{
"epoch": 0.42,
"learning_rate": 6.385479615833445e-05,
"loss": 2.7715,
"step": 4010
},
{
"epoch": 0.42,
"learning_rate": 6.37755246910531e-05,
"loss": 2.7551,
"step": 4015
},
{
"epoch": 0.42,
"learning_rate": 6.369621573584256e-05,
"loss": 2.7665,
"step": 4020
},
{
"epoch": 0.42,
"learning_rate": 6.361686950852971e-05,
"loss": 2.7747,
"step": 4025
},
{
"epoch": 0.42,
"learning_rate": 6.353748622504288e-05,
"loss": 2.7969,
"step": 4030
},
{
"epoch": 0.42,
"learning_rate": 6.345806610141121e-05,
"loss": 2.7534,
"step": 4035
},
{
"epoch": 0.42,
"learning_rate": 6.33786093537641e-05,
"loss": 2.7515,
"step": 4040
},
{
"epoch": 0.42,
"learning_rate": 6.329911619833063e-05,
"loss": 2.8612,
"step": 4045
},
{
"epoch": 0.42,
"learning_rate": 6.321958685143894e-05,
"loss": 2.8475,
"step": 4050
},
{
"epoch": 0.42,
"learning_rate": 6.314002152951569e-05,
"loss": 2.9223,
"step": 4055
},
{
"epoch": 0.42,
"learning_rate": 6.30604204490854e-05,
"loss": 2.8174,
"step": 4060
},
{
"epoch": 0.42,
"learning_rate": 6.298078382676993e-05,
"loss": 2.7896,
"step": 4065
},
{
"epoch": 0.42,
"learning_rate": 6.290111187928784e-05,
"loss": 2.8135,
"step": 4070
},
{
"epoch": 0.42,
"learning_rate": 6.282140482345387e-05,
"loss": 2.8162,
"step": 4075
},
{
"epoch": 0.42,
"learning_rate": 6.274166287617824e-05,
"loss": 2.7534,
"step": 4080
},
{
"epoch": 0.42,
"learning_rate": 6.266188625446615e-05,
"loss": 2.7764,
"step": 4085
},
{
"epoch": 0.43,
"learning_rate": 6.258207517541717e-05,
"loss": 2.7333,
"step": 4090
},
{
"epoch": 0.43,
"learning_rate": 6.250222985622461e-05,
"loss": 2.804,
"step": 4095
},
{
"epoch": 0.43,
"learning_rate": 6.2422350514175e-05,
"loss": 2.775,
"step": 4100
},
{
"epoch": 0.43,
"learning_rate": 6.234243736664742e-05,
"loss": 2.8172,
"step": 4105
},
{
"epoch": 0.43,
"learning_rate": 6.226249063111299e-05,
"loss": 2.866,
"step": 4110
},
{
"epoch": 0.43,
"learning_rate": 6.218251052513418e-05,
"loss": 2.7761,
"step": 4115
},
{
"epoch": 0.43,
"learning_rate": 6.210249726636431e-05,
"loss": 2.8461,
"step": 4120
},
{
"epoch": 0.43,
"learning_rate": 6.202245107254693e-05,
"loss": 2.7478,
"step": 4125
},
{
"epoch": 0.43,
"learning_rate": 6.194237216151516e-05,
"loss": 2.7578,
"step": 4130
},
{
"epoch": 0.43,
"learning_rate": 6.186226075119123e-05,
"loss": 2.8637,
"step": 4135
},
{
"epoch": 0.43,
"learning_rate": 6.178211705958572e-05,
"loss": 2.7526,
"step": 4140
},
{
"epoch": 0.43,
"learning_rate": 6.170194130479717e-05,
"loss": 2.7478,
"step": 4145
},
{
"epoch": 0.43,
"learning_rate": 6.16217337050113e-05,
"loss": 2.7694,
"step": 4150
},
{
"epoch": 0.43,
"learning_rate": 6.15414944785005e-05,
"loss": 2.7896,
"step": 4155
},
{
"epoch": 0.43,
"learning_rate": 6.146122384362326e-05,
"loss": 2.8394,
"step": 4160
},
{
"epoch": 0.43,
"learning_rate": 6.138092201882349e-05,
"loss": 2.7603,
"step": 4165
},
{
"epoch": 0.43,
"learning_rate": 6.130058922263e-05,
"loss": 2.7314,
"step": 4170
},
{
"epoch": 0.43,
"learning_rate": 6.122022567365592e-05,
"loss": 2.7428,
"step": 4175
},
{
"epoch": 0.43,
"learning_rate": 6.113983159059803e-05,
"loss": 2.8512,
"step": 4180
},
{
"epoch": 0.43,
"learning_rate": 6.105940719223621e-05,
"loss": 2.6747,
"step": 4185
},
{
"epoch": 0.44,
"learning_rate": 6.097895269743287e-05,
"loss": 2.7728,
"step": 4190
},
{
"epoch": 0.44,
"learning_rate": 6.089846832513224e-05,
"loss": 2.8832,
"step": 4195
},
{
"epoch": 0.44,
"learning_rate": 6.0817954294359955e-05,
"loss": 2.7268,
"step": 4200
},
{
"epoch": 0.44,
"learning_rate": 6.0737410824222316e-05,
"loss": 2.7661,
"step": 4205
},
{
"epoch": 0.44,
"learning_rate": 6.0656838133905727e-05,
"loss": 2.8184,
"step": 4210
},
{
"epoch": 0.44,
"learning_rate": 6.0576236442676104e-05,
"loss": 2.7203,
"step": 4215
},
{
"epoch": 0.44,
"learning_rate": 6.049560596987833e-05,
"loss": 2.7775,
"step": 4220
},
{
"epoch": 0.44,
"learning_rate": 6.04149469349356e-05,
"loss": 2.6867,
"step": 4225
},
{
"epoch": 0.44,
"learning_rate": 6.03342595573488e-05,
"loss": 2.8301,
"step": 4230
},
{
"epoch": 0.44,
"learning_rate": 6.0253544056695976e-05,
"loss": 2.7151,
"step": 4235
},
{
"epoch": 0.44,
"learning_rate": 6.01728006526317e-05,
"loss": 2.7374,
"step": 4240
},
{
"epoch": 0.44,
"learning_rate": 6.009202956488647e-05,
"loss": 2.8116,
"step": 4245
},
{
"epoch": 0.44,
"learning_rate": 6.001123101326613e-05,
"loss": 2.7625,
"step": 4250
},
{
"epoch": 0.44,
"learning_rate": 5.9930405217651274e-05,
"loss": 2.7298,
"step": 4255
},
{
"epoch": 0.44,
"learning_rate": 5.984955239799661e-05,
"loss": 2.778,
"step": 4260
},
{
"epoch": 0.44,
"learning_rate": 5.976867277433043e-05,
"loss": 2.8337,
"step": 4265
},
{
"epoch": 0.44,
"learning_rate": 5.968776656675389e-05,
"loss": 2.8382,
"step": 4270
},
{
"epoch": 0.44,
"learning_rate": 5.9606833995440584e-05,
"loss": 2.7471,
"step": 4275
},
{
"epoch": 0.44,
"learning_rate": 5.952587528063579e-05,
"loss": 2.768,
"step": 4280
},
{
"epoch": 0.45,
"learning_rate": 5.944489064265595e-05,
"loss": 2.7573,
"step": 4285
},
{
"epoch": 0.45,
"learning_rate": 5.936388030188804e-05,
"loss": 2.783,
"step": 4290
},
{
"epoch": 0.45,
"learning_rate": 5.9282844478789014e-05,
"loss": 2.8236,
"step": 4295
},
{
"epoch": 0.45,
"learning_rate": 5.920178339388513e-05,
"loss": 2.7366,
"step": 4300
},
{
"epoch": 0.45,
"learning_rate": 5.912069726777144e-05,
"loss": 2.8208,
"step": 4305
},
{
"epoch": 0.45,
"learning_rate": 5.9039586321111096e-05,
"loss": 2.711,
"step": 4310
},
{
"epoch": 0.45,
"learning_rate": 5.895845077463482e-05,
"loss": 2.7601,
"step": 4315
},
{
"epoch": 0.45,
"learning_rate": 5.887729084914024e-05,
"loss": 2.6759,
"step": 4320
},
{
"epoch": 0.45,
"learning_rate": 5.8796106765491424e-05,
"loss": 2.7948,
"step": 4325
},
{
"epoch": 0.45,
"learning_rate": 5.8714898744618055e-05,
"loss": 2.7223,
"step": 4330
},
{
"epoch": 0.45,
"learning_rate": 5.8633667007515056e-05,
"loss": 2.7322,
"step": 4335
},
{
"epoch": 0.45,
"learning_rate": 5.8552411775241845e-05,
"loss": 2.7299,
"step": 4340
},
{
"epoch": 0.45,
"learning_rate": 5.8471133268921794e-05,
"loss": 2.804,
"step": 4345
},
{
"epoch": 0.45,
"learning_rate": 5.8389831709741605e-05,
"loss": 2.6573,
"step": 4350
},
{
"epoch": 0.45,
"learning_rate": 5.830850731895071e-05,
"loss": 2.8836,
"step": 4355
},
{
"epoch": 0.45,
"learning_rate": 5.822716031786069e-05,
"loss": 2.7939,
"step": 4360
},
{
"epoch": 0.45,
"learning_rate": 5.8145790927844614e-05,
"loss": 2.8612,
"step": 4365
},
{
"epoch": 0.45,
"learning_rate": 5.806439937033654e-05,
"loss": 2.7475,
"step": 4370
},
{
"epoch": 0.45,
"learning_rate": 5.798298586683084e-05,
"loss": 2.7155,
"step": 4375
},
{
"epoch": 0.46,
"learning_rate": 5.790155063888155e-05,
"loss": 2.7545,
"step": 4380
},
{
"epoch": 0.46,
"learning_rate": 5.78200939081019e-05,
"loss": 2.822,
"step": 4385
},
{
"epoch": 0.46,
"learning_rate": 5.7738615896163586e-05,
"loss": 2.8636,
"step": 4390
},
{
"epoch": 0.46,
"learning_rate": 5.765711682479621e-05,
"loss": 2.635,
"step": 4395
},
{
"epoch": 0.46,
"learning_rate": 5.7575596915786754e-05,
"loss": 2.7465,
"step": 4400
},
{
"epoch": 0.46,
"learning_rate": 5.749405639097882e-05,
"loss": 2.7909,
"step": 4405
},
{
"epoch": 0.46,
"learning_rate": 5.74124954722722e-05,
"loss": 2.7634,
"step": 4410
},
{
"epoch": 0.46,
"learning_rate": 5.7330914381622104e-05,
"loss": 2.7577,
"step": 4415
},
{
"epoch": 0.46,
"learning_rate": 5.724931334103867e-05,
"loss": 2.8498,
"step": 4420
},
{
"epoch": 0.46,
"learning_rate": 5.716769257258634e-05,
"loss": 2.7725,
"step": 4425
},
{
"epoch": 0.46,
"learning_rate": 5.708605229838323e-05,
"loss": 2.7638,
"step": 4430
},
{
"epoch": 0.46,
"learning_rate": 5.700439274060053e-05,
"loss": 2.8365,
"step": 4435
},
{
"epoch": 0.46,
"learning_rate": 5.692271412146192e-05,
"loss": 2.8213,
"step": 4440
},
{
"epoch": 0.46,
"learning_rate": 5.6841016663242916e-05,
"loss": 2.8199,
"step": 4445
},
{
"epoch": 0.46,
"learning_rate": 5.675930058827039e-05,
"loss": 2.84,
"step": 4450
},
{
"epoch": 0.46,
"learning_rate": 5.66775661189218e-05,
"loss": 2.6791,
"step": 4455
},
{
"epoch": 0.46,
"learning_rate": 5.659581347762466e-05,
"loss": 2.7793,
"step": 4460
},
{
"epoch": 0.46,
"learning_rate": 5.651404288685598e-05,
"loss": 2.8542,
"step": 4465
},
{
"epoch": 0.46,
"learning_rate": 5.643225456914156e-05,
"loss": 2.8141,
"step": 4470
},
{
"epoch": 0.47,
"learning_rate": 5.635044874705551e-05,
"loss": 2.6986,
"step": 4475
},
{
"epoch": 0.47,
"learning_rate": 5.626862564321952e-05,
"loss": 2.6377,
"step": 4480
},
{
"epoch": 0.47,
"learning_rate": 5.618678548030235e-05,
"loss": 2.7496,
"step": 4485
},
{
"epoch": 0.47,
"learning_rate": 5.610492848101915e-05,
"loss": 2.7882,
"step": 4490
},
{
"epoch": 0.47,
"learning_rate": 5.602305486813089e-05,
"loss": 2.8307,
"step": 4495
},
{
"epoch": 0.47,
"learning_rate": 5.5941164864443785e-05,
"loss": 2.8126,
"step": 4500
},
{
"epoch": 0.47,
"learning_rate": 5.585925869280861e-05,
"loss": 2.6938,
"step": 4505
},
{
"epoch": 0.47,
"learning_rate": 5.577733657612019e-05,
"loss": 2.8165,
"step": 4510
},
{
"epoch": 0.47,
"learning_rate": 5.5695398737316685e-05,
"loss": 2.7802,
"step": 4515
},
{
"epoch": 0.47,
"learning_rate": 5.561344539937907e-05,
"loss": 2.7824,
"step": 4520
},
{
"epoch": 0.47,
"learning_rate": 5.55314767853305e-05,
"loss": 2.682,
"step": 4525
},
{
"epoch": 0.47,
"learning_rate": 5.5449493118235715e-05,
"loss": 2.7022,
"step": 4530
},
{
"epoch": 0.47,
"learning_rate": 5.536749462120039e-05,
"loss": 2.795,
"step": 4535
},
{
"epoch": 0.47,
"learning_rate": 5.528548151737056e-05,
"loss": 2.7823,
"step": 4540
},
{
"epoch": 0.47,
"learning_rate": 5.520345402993203e-05,
"loss": 2.6922,
"step": 4545
},
{
"epoch": 0.47,
"learning_rate": 5.512141238210974e-05,
"loss": 2.7793,
"step": 4550
},
{
"epoch": 0.47,
"learning_rate": 5.5039356797167155e-05,
"loss": 2.7519,
"step": 4555
},
{
"epoch": 0.47,
"learning_rate": 5.4957287498405686e-05,
"loss": 2.6961,
"step": 4560
},
{
"epoch": 0.47,
"learning_rate": 5.487520470916404e-05,
"loss": 2.6963,
"step": 4565
},
{
"epoch": 0.47,
"learning_rate": 5.479310865281766e-05,
"loss": 2.8037,
"step": 4570
},
{
"epoch": 0.48,
"learning_rate": 5.471099955277809e-05,
"loss": 2.7891,
"step": 4575
},
{
"epoch": 0.48,
"learning_rate": 5.462887763249235e-05,
"loss": 2.7974,
"step": 4580
},
{
"epoch": 0.48,
"learning_rate": 5.454674311544235e-05,
"loss": 2.746,
"step": 4585
},
{
"epoch": 0.48,
"learning_rate": 5.446459622514433e-05,
"loss": 2.9017,
"step": 4590
},
{
"epoch": 0.48,
"learning_rate": 5.4382437185148125e-05,
"loss": 2.7158,
"step": 4595
},
{
"epoch": 0.48,
"learning_rate": 5.430026621903669e-05,
"loss": 2.6632,
"step": 4600
},
{
"epoch": 0.48,
"learning_rate": 5.4218083550425416e-05,
"loss": 2.6599,
"step": 4605
},
{
"epoch": 0.48,
"learning_rate": 5.4135889402961547e-05,
"loss": 2.7192,
"step": 4610
},
{
"epoch": 0.48,
"learning_rate": 5.405368400032357e-05,
"loss": 2.7212,
"step": 4615
},
{
"epoch": 0.48,
"learning_rate": 5.3971467566220546e-05,
"loss": 2.604,
"step": 4620
},
{
"epoch": 0.48,
"learning_rate": 5.3889240324391645e-05,
"loss": 2.8103,
"step": 4625
},
{
"epoch": 0.48,
"learning_rate": 5.380700249860538e-05,
"loss": 2.6619,
"step": 4630
},
{
"epoch": 0.48,
"learning_rate": 5.372475431265912e-05,
"loss": 2.73,
"step": 4635
},
{
"epoch": 0.48,
"learning_rate": 5.3642495990378386e-05,
"loss": 2.791,
"step": 4640
},
{
"epoch": 0.48,
"learning_rate": 5.356022775561627e-05,
"loss": 2.7453,
"step": 4645
},
{
"epoch": 0.48,
"learning_rate": 5.347794983225289e-05,
"loss": 2.6631,
"step": 4650
},
{
"epoch": 0.48,
"learning_rate": 5.339566244419471e-05,
"loss": 2.803,
"step": 4655
},
{
"epoch": 0.48,
"learning_rate": 5.331336581537395e-05,
"loss": 2.8289,
"step": 4660
},
{
"epoch": 0.48,
"learning_rate": 5.323106016974795e-05,
"loss": 2.852,
"step": 4665
},
{
"epoch": 0.49,
"learning_rate": 5.314874573129862e-05,
"loss": 2.8183,
"step": 4670
},
{
"epoch": 0.49,
"learning_rate": 5.3066422724031794e-05,
"loss": 2.8214,
"step": 4675
},
{
"epoch": 0.49,
"learning_rate": 5.298409137197663e-05,
"loss": 2.6957,
"step": 4680
},
{
"epoch": 0.49,
"learning_rate": 5.2901751899184984e-05,
"loss": 2.7421,
"step": 4685
},
{
"epoch": 0.49,
"learning_rate": 5.2819404529730796e-05,
"loss": 2.7114,
"step": 4690
},
{
"epoch": 0.49,
"learning_rate": 5.273704948770951e-05,
"loss": 2.7576,
"step": 4695
},
{
"epoch": 0.49,
"learning_rate": 5.265468699723748e-05,
"loss": 2.777,
"step": 4700
},
{
"epoch": 0.49,
"learning_rate": 5.2572317282451286e-05,
"loss": 2.7504,
"step": 4705
},
{
"epoch": 0.49,
"learning_rate": 5.2489940567507165e-05,
"loss": 2.8379,
"step": 4710
},
{
"epoch": 0.49,
"learning_rate": 5.240755707658046e-05,
"loss": 2.7288,
"step": 4715
},
{
"epoch": 0.49,
"learning_rate": 5.232516703386489e-05,
"loss": 2.7368,
"step": 4720
},
{
"epoch": 0.49,
"learning_rate": 5.2242770663572027e-05,
"loss": 2.6741,
"step": 4725
},
{
"epoch": 0.49,
"learning_rate": 5.2160368189930694e-05,
"loss": 2.6761,
"step": 4730
},
{
"epoch": 0.49,
"learning_rate": 5.2077959837186263e-05,
"loss": 2.7722,
"step": 4735
},
{
"epoch": 0.49,
"learning_rate": 5.1995545829600166e-05,
"loss": 2.7227,
"step": 4740
},
{
"epoch": 0.49,
"learning_rate": 5.191312639144915e-05,
"loss": 2.7499,
"step": 4745
},
{
"epoch": 0.49,
"learning_rate": 5.1830701747024825e-05,
"loss": 2.7366,
"step": 4750
},
{
"epoch": 0.49,
"learning_rate": 5.1748272120632925e-05,
"loss": 2.7151,
"step": 4755
},
{
"epoch": 0.49,
"learning_rate": 5.1665837736592736e-05,
"loss": 2.777,
"step": 4760
},
{
"epoch": 0.5,
"learning_rate": 5.15833988192365e-05,
"loss": 2.7701,
"step": 4765
},
{
"epoch": 0.5,
"learning_rate": 5.15009555929088e-05,
"loss": 2.6703,
"step": 4770
},
{
"epoch": 0.5,
"learning_rate": 5.141850828196595e-05,
"loss": 2.6332,
"step": 4775
},
{
"epoch": 0.5,
"learning_rate": 5.133605711077536e-05,
"loss": 2.7286,
"step": 4780
},
{
"epoch": 0.5,
"learning_rate": 5.1253602303714945e-05,
"loss": 2.737,
"step": 4785
},
{
"epoch": 0.5,
"learning_rate": 5.1171144085172553e-05,
"loss": 2.8568,
"step": 4790
},
{
"epoch": 0.5,
"learning_rate": 5.108868267954526e-05,
"loss": 2.7379,
"step": 4795
},
{
"epoch": 0.5,
"learning_rate": 5.1006218311238884e-05,
"loss": 2.7201,
"step": 4800
},
{
"epoch": 0.5,
"learning_rate": 5.092375120466724e-05,
"loss": 2.8514,
"step": 4805
},
{
"epoch": 0.5,
"learning_rate": 5.084128158425161e-05,
"loss": 2.7207,
"step": 4810
},
{
"epoch": 0.5,
"learning_rate": 5.075880967442014e-05,
"loss": 2.7052,
"step": 4815
},
{
"epoch": 0.5,
"learning_rate": 5.067633569960718e-05,
"loss": 2.7205,
"step": 4820
},
{
"epoch": 0.5,
"learning_rate": 5.059385988425274e-05,
"loss": 2.7462,
"step": 4825
},
{
"epoch": 0.5,
"learning_rate": 5.0511382452801794e-05,
"loss": 2.8259,
"step": 4830
},
{
"epoch": 0.5,
"learning_rate": 5.0428903629703737e-05,
"loss": 2.8064,
"step": 4835
},
{
"epoch": 0.5,
"learning_rate": 5.034642363941174e-05,
"loss": 2.7844,
"step": 4840
},
{
"epoch": 0.5,
"learning_rate": 5.026394270638215e-05,
"loss": 2.7799,
"step": 4845
},
{
"epoch": 0.5,
"learning_rate": 5.0181461055073907e-05,
"loss": 2.7156,
"step": 4850
},
{
"epoch": 0.5,
"learning_rate": 5.009897890994788e-05,
"loss": 2.7288,
"step": 4855
},
{
"epoch": 0.51,
"learning_rate": 5.001649649546626e-05,
"loss": 2.788,
"step": 4860
},
{
"epoch": 0.51,
"learning_rate": 4.993401403609203e-05,
"loss": 2.699,
"step": 4865
},
{
"epoch": 0.51,
"learning_rate": 4.9851531756288234e-05,
"loss": 2.7868,
"step": 4870
},
{
"epoch": 0.51,
"learning_rate": 4.976904988051748e-05,
"loss": 2.6787,
"step": 4875
},
{
"epoch": 0.51,
"learning_rate": 4.968656863324124e-05,
"loss": 2.7173,
"step": 4880
},
{
"epoch": 0.51,
"learning_rate": 4.96040882389193e-05,
"loss": 2.6892,
"step": 4885
},
{
"epoch": 0.51,
"learning_rate": 4.952160892200909e-05,
"loss": 2.7195,
"step": 4890
},
{
"epoch": 0.51,
"learning_rate": 4.9439130906965144e-05,
"loss": 2.7833,
"step": 4895
},
{
"epoch": 0.51,
"learning_rate": 4.9356654418238444e-05,
"loss": 2.6298,
"step": 4900
},
{
"epoch": 0.51,
"learning_rate": 4.927417968027581e-05,
"loss": 2.8132,
"step": 4905
},
{
"epoch": 0.51,
"learning_rate": 4.91917069175193e-05,
"loss": 2.7593,
"step": 4910
},
{
"epoch": 0.51,
"learning_rate": 4.910923635440558e-05,
"loss": 2.749,
"step": 4915
},
{
"epoch": 0.51,
"learning_rate": 4.9026768215365386e-05,
"loss": 2.7355,
"step": 4920
},
{
"epoch": 0.51,
"learning_rate": 4.8944302724822793e-05,
"loss": 2.7952,
"step": 4925
},
{
"epoch": 0.51,
"learning_rate": 4.886184010719471e-05,
"loss": 2.6316,
"step": 4930
},
{
"epoch": 0.51,
"learning_rate": 4.877938058689022e-05,
"loss": 2.8273,
"step": 4935
},
{
"epoch": 0.51,
"learning_rate": 4.8696924388309946e-05,
"loss": 2.7793,
"step": 4940
},
{
"epoch": 0.51,
"learning_rate": 4.86144717358455e-05,
"loss": 2.7784,
"step": 4945
},
{
"epoch": 0.51,
"learning_rate": 4.853202285387886e-05,
"loss": 2.8057,
"step": 4950
},
{
"epoch": 0.51,
"learning_rate": 4.84495779667817e-05,
"loss": 2.796,
"step": 4955
},
{
"epoch": 0.52,
"learning_rate": 4.836713729891487e-05,
"loss": 2.7398,
"step": 4960
},
{
"epoch": 0.52,
"learning_rate": 4.8284701074627694e-05,
"loss": 2.7258,
"step": 4965
},
{
"epoch": 0.52,
"learning_rate": 4.820226951825741e-05,
"loss": 2.7267,
"step": 4970
},
{
"epoch": 0.52,
"learning_rate": 4.81198428541286e-05,
"loss": 2.7369,
"step": 4975
},
{
"epoch": 0.52,
"learning_rate": 4.8037421306552485e-05,
"loss": 2.7647,
"step": 4980
},
{
"epoch": 0.52,
"learning_rate": 4.795500509982636e-05,
"loss": 2.7825,
"step": 4985
},
{
"epoch": 0.52,
"learning_rate": 4.7872594458233e-05,
"loss": 2.778,
"step": 4990
},
{
"epoch": 0.52,
"learning_rate": 4.779018960604007e-05,
"loss": 2.6539,
"step": 4995
},
{
"epoch": 0.52,
"learning_rate": 4.770779076749941e-05,
"loss": 2.7854,
"step": 5000
},
{
"epoch": 0.52,
"learning_rate": 4.7625398166846565e-05,
"loss": 2.7741,
"step": 5005
},
{
"epoch": 0.52,
"learning_rate": 4.754301202830006e-05,
"loss": 2.6679,
"step": 5010
},
{
"epoch": 0.52,
"learning_rate": 4.746063257606082e-05,
"loss": 2.6797,
"step": 5015
},
{
"epoch": 0.52,
"learning_rate": 4.737826003431162e-05,
"loss": 2.7992,
"step": 5020
},
{
"epoch": 0.52,
"learning_rate": 4.729589462721642e-05,
"loss": 2.7308,
"step": 5025
},
{
"epoch": 0.52,
"learning_rate": 4.7213536578919734e-05,
"loss": 2.8089,
"step": 5030
},
{
"epoch": 0.52,
"learning_rate": 4.7131186113546076e-05,
"loss": 2.7417,
"step": 5035
},
{
"epoch": 0.52,
"learning_rate": 4.704884345519932e-05,
"loss": 2.8385,
"step": 5040
},
{
"epoch": 0.52,
"learning_rate": 4.6966508827962075e-05,
"loss": 2.6673,
"step": 5045
},
{
"epoch": 0.52,
"learning_rate": 4.688418245589514e-05,
"loss": 2.8795,
"step": 5050
},
{
"epoch": 0.53,
"learning_rate": 4.6801864563036805e-05,
"loss": 2.6769,
"step": 5055
},
{
"epoch": 0.53,
"learning_rate": 4.6719555373402294e-05,
"loss": 2.6339,
"step": 5060
},
{
"epoch": 0.53,
"learning_rate": 4.6637255110983154e-05,
"loss": 2.737,
"step": 5065
},
{
"epoch": 0.53,
"learning_rate": 4.655496399974664e-05,
"loss": 2.7355,
"step": 5070
},
{
"epoch": 0.53,
"learning_rate": 4.6472682263635106e-05,
"loss": 2.6927,
"step": 5075
},
{
"epoch": 0.53,
"learning_rate": 4.63904101265654e-05,
"loss": 2.7484,
"step": 5080
},
{
"epoch": 0.53,
"learning_rate": 4.630814781242821e-05,
"loss": 2.7346,
"step": 5085
},
{
"epoch": 0.53,
"learning_rate": 4.6225895545087535e-05,
"loss": 2.741,
"step": 5090
},
{
"epoch": 0.53,
"learning_rate": 4.614365354838e-05,
"loss": 2.6967,
"step": 5095
},
{
"epoch": 0.53,
"learning_rate": 4.60614220461143e-05,
"loss": 2.7827,
"step": 5100
},
{
"epoch": 0.53,
"learning_rate": 4.597920126207058e-05,
"loss": 2.7814,
"step": 5105
},
{
"epoch": 0.53,
"learning_rate": 4.5896991419999796e-05,
"loss": 2.7238,
"step": 5110
},
{
"epoch": 0.53,
"learning_rate": 4.581479274362314e-05,
"loss": 2.7729,
"step": 5115
},
{
"epoch": 0.53,
"learning_rate": 4.5732605456631386e-05,
"loss": 2.7935,
"step": 5120
},
{
"epoch": 0.53,
"learning_rate": 4.565042978268438e-05,
"loss": 2.779,
"step": 5125
},
{
"epoch": 0.53,
"learning_rate": 4.55682659454103e-05,
"loss": 2.8434,
"step": 5130
},
{
"epoch": 0.53,
"learning_rate": 4.5486114168405156e-05,
"loss": 2.6332,
"step": 5135
},
{
"epoch": 0.53,
"learning_rate": 4.5403974675232104e-05,
"loss": 2.7884,
"step": 5140
},
{
"epoch": 0.53,
"learning_rate": 4.532184768942089e-05,
"loss": 2.7641,
"step": 5145
},
{
"epoch": 0.54,
"learning_rate": 4.523973343446723e-05,
"loss": 2.6598,
"step": 5150
},
{
"epoch": 0.54,
"learning_rate": 4.5157632133832185e-05,
"loss": 2.8221,
"step": 5155
},
{
"epoch": 0.54,
"learning_rate": 4.5075544010941564e-05,
"loss": 2.7466,
"step": 5160
},
{
"epoch": 0.54,
"learning_rate": 4.499346928918531e-05,
"loss": 2.657,
"step": 5165
},
{
"epoch": 0.54,
"learning_rate": 4.491140819191691e-05,
"loss": 2.73,
"step": 5170
},
{
"epoch": 0.54,
"learning_rate": 4.482936094245274e-05,
"loss": 2.7519,
"step": 5175
},
{
"epoch": 0.54,
"learning_rate": 4.4747327764071543e-05,
"loss": 2.7373,
"step": 5180
},
{
"epoch": 0.54,
"learning_rate": 4.466530888001374e-05,
"loss": 2.826,
"step": 5185
},
{
"epoch": 0.54,
"learning_rate": 4.458330451348085e-05,
"loss": 2.7996,
"step": 5190
},
{
"epoch": 0.54,
"learning_rate": 4.450131488763488e-05,
"loss": 2.7324,
"step": 5195
},
{
"epoch": 0.54,
"learning_rate": 4.441934022559776e-05,
"loss": 2.6651,
"step": 5200
},
{
"epoch": 0.54,
"learning_rate": 4.433738075045065e-05,
"loss": 2.7593,
"step": 5205
},
{
"epoch": 0.54,
"learning_rate": 4.425543668523341e-05,
"loss": 2.8338,
"step": 5210
},
{
"epoch": 0.54,
"learning_rate": 4.417350825294395e-05,
"loss": 2.7163,
"step": 5215
},
{
"epoch": 0.54,
"learning_rate": 4.409159567653765e-05,
"loss": 2.7272,
"step": 5220
},
{
"epoch": 0.54,
"learning_rate": 4.400969917892674e-05,
"loss": 2.6687,
"step": 5225
},
{
"epoch": 0.54,
"learning_rate": 4.392781898297969e-05,
"loss": 2.6519,
"step": 5230
},
{
"epoch": 0.54,
"learning_rate": 4.384595531152061e-05,
"loss": 2.8416,
"step": 5235
},
{
"epoch": 0.54,
"learning_rate": 4.376410838732863e-05,
"loss": 2.7061,
"step": 5240
},
{
"epoch": 0.55,
"learning_rate": 4.368227843313731e-05,
"loss": 2.6549,
"step": 5245
},
{
"epoch": 0.55,
"learning_rate": 4.360046567163403e-05,
"loss": 2.7547,
"step": 5250
},
{
"epoch": 0.55,
"learning_rate": 4.351867032545939e-05,
"loss": 2.749,
"step": 5255
},
{
"epoch": 0.55,
"learning_rate": 4.3436892617206605e-05,
"loss": 2.7606,
"step": 5260
},
{
"epoch": 0.55,
"learning_rate": 4.335513276942086e-05,
"loss": 2.6941,
"step": 5265
},
{
"epoch": 0.55,
"learning_rate": 4.3273391004598745e-05,
"loss": 2.7235,
"step": 5270
},
{
"epoch": 0.55,
"learning_rate": 4.319166754518768e-05,
"loss": 2.7548,
"step": 5275
},
{
"epoch": 0.55,
"learning_rate": 4.3109962613585214e-05,
"loss": 2.7403,
"step": 5280
},
{
"epoch": 0.55,
"learning_rate": 4.302827643213851e-05,
"loss": 2.7132,
"step": 5285
},
{
"epoch": 0.55,
"learning_rate": 4.294660922314367e-05,
"loss": 2.7132,
"step": 5290
},
{
"epoch": 0.55,
"learning_rate": 4.286496120884522e-05,
"loss": 2.7145,
"step": 5295
},
{
"epoch": 0.55,
"learning_rate": 4.278333261143541e-05,
"loss": 2.7391,
"step": 5300
},
{
"epoch": 0.55,
"learning_rate": 4.270172365305367e-05,
"loss": 2.8301,
"step": 5305
},
{
"epoch": 0.55,
"learning_rate": 4.2620134555785976e-05,
"loss": 2.7432,
"step": 5310
},
{
"epoch": 0.55,
"learning_rate": 4.2538565541664245e-05,
"loss": 2.646,
"step": 5315
},
{
"epoch": 0.55,
"learning_rate": 4.245701683266575e-05,
"loss": 2.7398,
"step": 5320
},
{
"epoch": 0.55,
"learning_rate": 4.237548865071251e-05,
"loss": 2.6704,
"step": 5325
},
{
"epoch": 0.55,
"learning_rate": 4.229398121767069e-05,
"loss": 2.8324,
"step": 5330
},
{
"epoch": 0.55,
"learning_rate": 4.221249475534997e-05,
"loss": 2.7215,
"step": 5335
},
{
"epoch": 0.55,
"learning_rate": 4.2131029485502995e-05,
"loss": 2.6765,
"step": 5340
},
{
"epoch": 0.56,
"learning_rate": 4.204958562982468e-05,
"loss": 2.6898,
"step": 5345
},
{
"epoch": 0.56,
"learning_rate": 4.196816340995172e-05,
"loss": 2.6884,
"step": 5350
},
{
"epoch": 0.56,
"learning_rate": 4.1886763047461925e-05,
"loss": 2.7053,
"step": 5355
},
{
"epoch": 0.56,
"learning_rate": 4.180538476387359e-05,
"loss": 2.7,
"step": 5360
},
{
"epoch": 0.56,
"learning_rate": 4.172402878064495e-05,
"loss": 2.6314,
"step": 5365
},
{
"epoch": 0.56,
"learning_rate": 4.164269531917354e-05,
"loss": 2.7212,
"step": 5370
},
{
"epoch": 0.56,
"learning_rate": 4.156138460079563e-05,
"loss": 2.6327,
"step": 5375
},
{
"epoch": 0.56,
"learning_rate": 4.14800968467856e-05,
"loss": 2.709,
"step": 5380
},
{
"epoch": 0.56,
"learning_rate": 4.1398832278355296e-05,
"loss": 2.7675,
"step": 5385
},
{
"epoch": 0.56,
"learning_rate": 4.131759111665349e-05,
"loss": 2.7527,
"step": 5390
},
{
"epoch": 0.56,
"learning_rate": 4.123637358276526e-05,
"loss": 2.6614,
"step": 5395
},
{
"epoch": 0.56,
"learning_rate": 4.1155179897711374e-05,
"loss": 2.7871,
"step": 5400
},
{
"epoch": 0.56,
"learning_rate": 4.107401028244773e-05,
"loss": 2.6748,
"step": 5405
},
{
"epoch": 0.56,
"learning_rate": 4.0992864957864694e-05,
"loss": 2.7458,
"step": 5410
},
{
"epoch": 0.56,
"learning_rate": 4.0911744144786523e-05,
"loss": 2.8008,
"step": 5415
},
{
"epoch": 0.56,
"learning_rate": 4.083064806397078e-05,
"loss": 2.702,
"step": 5420
},
{
"epoch": 0.56,
"learning_rate": 4.074957693610775e-05,
"loss": 2.7229,
"step": 5425
},
{
"epoch": 0.56,
"learning_rate": 4.0668530981819776e-05,
"loss": 2.7552,
"step": 5430
},
{
"epoch": 0.56,
"learning_rate": 4.0587510421660704e-05,
"loss": 2.7721,
"step": 5435
},
{
"epoch": 0.57,
"learning_rate": 4.050651547611526e-05,
"loss": 2.8156,
"step": 5440
},
{
"epoch": 0.57,
"learning_rate": 4.04255463655985e-05,
"loss": 2.6884,
"step": 5445
},
{
"epoch": 0.57,
"learning_rate": 4.0344603310455154e-05,
"loss": 2.637,
"step": 5450
},
{
"epoch": 0.57,
"learning_rate": 4.026368653095906e-05,
"loss": 2.6856,
"step": 5455
},
{
"epoch": 0.57,
"learning_rate": 4.0182796247312526e-05,
"loss": 2.7604,
"step": 5460
},
{
"epoch": 0.57,
"learning_rate": 4.010193267964576e-05,
"loss": 2.765,
"step": 5465
},
{
"epoch": 0.57,
"learning_rate": 4.0021096048016297e-05,
"loss": 2.7565,
"step": 5470
},
{
"epoch": 0.57,
"learning_rate": 3.99402865724083e-05,
"loss": 2.7977,
"step": 5475
},
{
"epoch": 0.57,
"learning_rate": 3.985950447273214e-05,
"loss": 2.736,
"step": 5480
},
{
"epoch": 0.57,
"learning_rate": 3.9778749968823586e-05,
"loss": 2.6924,
"step": 5485
},
{
"epoch": 0.57,
"learning_rate": 3.969802328044337e-05,
"loss": 2.7419,
"step": 5490
},
{
"epoch": 0.57,
"learning_rate": 3.9617324627276487e-05,
"loss": 2.6471,
"step": 5495
},
{
"epoch": 0.57,
"learning_rate": 3.9536654228931685e-05,
"loss": 2.6561,
"step": 5500
},
{
"epoch": 0.57,
"learning_rate": 3.945601230494079e-05,
"loss": 2.6891,
"step": 5505
},
{
"epoch": 0.57,
"learning_rate": 3.937539907475814e-05,
"loss": 2.7776,
"step": 5510
},
{
"epoch": 0.57,
"learning_rate": 3.9294814757759985e-05,
"loss": 2.7243,
"step": 5515
},
{
"epoch": 0.57,
"learning_rate": 3.921425957324392e-05,
"loss": 2.7389,
"step": 5520
},
{
"epoch": 0.57,
"learning_rate": 3.913373374042822e-05,
"loss": 2.6752,
"step": 5525
},
{
"epoch": 0.57,
"learning_rate": 3.905323747845133e-05,
"loss": 2.778,
"step": 5530
},
{
"epoch": 0.58,
"learning_rate": 3.8972771006371193e-05,
"loss": 2.6726,
"step": 5535
},
{
"epoch": 0.58,
"learning_rate": 3.8892334543164675e-05,
"loss": 2.6949,
"step": 5540
},
{
"epoch": 0.58,
"learning_rate": 3.8811928307727005e-05,
"loss": 2.7808,
"step": 5545
},
{
"epoch": 0.58,
"learning_rate": 3.8731552518871115e-05,
"loss": 2.7885,
"step": 5550
},
{
"epoch": 0.58,
"learning_rate": 3.8651207395327135e-05,
"loss": 2.6868,
"step": 5555
},
{
"epoch": 0.58,
"learning_rate": 3.8570893155741686e-05,
"loss": 2.7966,
"step": 5560
},
{
"epoch": 0.58,
"learning_rate": 3.849061001867739e-05,
"loss": 2.8397,
"step": 5565
},
{
"epoch": 0.58,
"learning_rate": 3.8410358202612184e-05,
"loss": 2.724,
"step": 5570
},
{
"epoch": 0.58,
"learning_rate": 3.8330137925938825e-05,
"loss": 2.7404,
"step": 5575
},
{
"epoch": 0.58,
"learning_rate": 3.82499494069642e-05,
"loss": 2.7237,
"step": 5580
},
{
"epoch": 0.58,
"learning_rate": 3.816979286390877e-05,
"loss": 2.697,
"step": 5585
},
{
"epoch": 0.58,
"learning_rate": 3.808966851490599e-05,
"loss": 2.7746,
"step": 5590
},
{
"epoch": 0.58,
"learning_rate": 3.800957657800173e-05,
"loss": 2.8274,
"step": 5595
},
{
"epoch": 0.58,
"learning_rate": 3.792951727115359e-05,
"loss": 2.6803,
"step": 5600
},
{
"epoch": 0.58,
"learning_rate": 3.784949081223046e-05,
"loss": 2.6819,
"step": 5605
},
{
"epoch": 0.58,
"learning_rate": 3.7769497419011766e-05,
"loss": 2.8205,
"step": 5610
},
{
"epoch": 0.58,
"learning_rate": 3.768953730918699e-05,
"loss": 2.7139,
"step": 5615
},
{
"epoch": 0.58,
"learning_rate": 3.760961070035501e-05,
"loss": 2.6233,
"step": 5620
},
{
"epoch": 0.58,
"learning_rate": 3.752971781002355e-05,
"loss": 2.7341,
"step": 5625
},
{
"epoch": 0.59,
"learning_rate": 3.74498588556086e-05,
"loss": 2.7425,
"step": 5630
},
{
"epoch": 0.59,
"learning_rate": 3.737003405443376e-05,
"loss": 2.7893,
"step": 5635
},
{
"epoch": 0.59,
"learning_rate": 3.72902436237297e-05,
"loss": 2.6761,
"step": 5640
},
{
"epoch": 0.59,
"learning_rate": 3.7210487780633564e-05,
"loss": 2.6289,
"step": 5645
},
{
"epoch": 0.59,
"learning_rate": 3.713076674218836e-05,
"loss": 2.7075,
"step": 5650
},
{
"epoch": 0.59,
"learning_rate": 3.70510807253424e-05,
"loss": 2.6829,
"step": 5655
},
{
"epoch": 0.59,
"learning_rate": 3.6971429946948665e-05,
"loss": 2.7653,
"step": 5660
},
{
"epoch": 0.59,
"learning_rate": 3.6891814623764254e-05,
"loss": 2.7096,
"step": 5665
},
{
"epoch": 0.59,
"learning_rate": 3.6812234972449774e-05,
"loss": 2.8272,
"step": 5670
},
{
"epoch": 0.59,
"learning_rate": 3.673269120956875e-05,
"loss": 2.7336,
"step": 5675
},
{
"epoch": 0.59,
"learning_rate": 3.6653183551587086e-05,
"loss": 2.8061,
"step": 5680
},
{
"epoch": 0.59,
"learning_rate": 3.657371221487239e-05,
"loss": 2.6946,
"step": 5685
},
{
"epoch": 0.59,
"learning_rate": 3.6494277415693424e-05,
"loss": 2.5687,
"step": 5690
},
{
"epoch": 0.59,
"learning_rate": 3.6414879370219526e-05,
"loss": 2.6681,
"step": 5695
},
{
"epoch": 0.59,
"learning_rate": 3.633551829452003e-05,
"loss": 2.622,
"step": 5700
},
{
"epoch": 0.59,
"learning_rate": 3.625619440456366e-05,
"loss": 2.6905,
"step": 5705
},
{
"epoch": 0.59,
"learning_rate": 3.617690791621793e-05,
"loss": 2.5904,
"step": 5710
},
{
"epoch": 0.59,
"learning_rate": 3.609765904524857e-05,
"loss": 2.7405,
"step": 5715
},
{
"epoch": 0.59,
"learning_rate": 3.601844800731895e-05,
"loss": 2.7857,
"step": 5720
},
{
"epoch": 0.59,
"learning_rate": 3.59392750179895e-05,
"loss": 2.7771,
"step": 5725
},
{
"epoch": 0.6,
"learning_rate": 3.586014029271708e-05,
"loss": 2.8068,
"step": 5730
},
{
"epoch": 0.6,
"learning_rate": 3.5781044046854416e-05,
"loss": 2.7863,
"step": 5735
},
{
"epoch": 0.6,
"learning_rate": 3.5701986495649555e-05,
"loss": 2.8068,
"step": 5740
},
{
"epoch": 0.6,
"learning_rate": 3.562296785424518e-05,
"loss": 2.7266,
"step": 5745
},
{
"epoch": 0.6,
"learning_rate": 3.554398833767816e-05,
"loss": 2.6987,
"step": 5750
},
{
"epoch": 0.6,
"learning_rate": 3.546504816087886e-05,
"loss": 2.7184,
"step": 5755
},
{
"epoch": 0.6,
"learning_rate": 3.538614753867058e-05,
"loss": 2.8262,
"step": 5760
},
{
"epoch": 0.6,
"learning_rate": 3.530728668576898e-05,
"loss": 2.6614,
"step": 5765
},
{
"epoch": 0.6,
"learning_rate": 3.52284658167815e-05,
"loss": 2.6184,
"step": 5770
},
{
"epoch": 0.6,
"learning_rate": 3.5149685146206756e-05,
"loss": 2.7209,
"step": 5775
},
{
"epoch": 0.6,
"learning_rate": 3.507094488843401e-05,
"loss": 2.7134,
"step": 5780
},
{
"epoch": 0.6,
"learning_rate": 3.499224525774251e-05,
"loss": 2.7139,
"step": 5785
},
{
"epoch": 0.6,
"learning_rate": 3.4913586468300934e-05,
"loss": 2.7206,
"step": 5790
},
{
"epoch": 0.6,
"learning_rate": 3.483496873416685e-05,
"loss": 2.6727,
"step": 5795
},
{
"epoch": 0.6,
"learning_rate": 3.4756392269286095e-05,
"loss": 2.7336,
"step": 5800
},
{
"epoch": 0.6,
"learning_rate": 3.467785728749219e-05,
"loss": 2.673,
"step": 5805
},
{
"epoch": 0.6,
"learning_rate": 3.4599364002505755e-05,
"loss": 2.7281,
"step": 5810
},
{
"epoch": 0.6,
"learning_rate": 3.452091262793395e-05,
"loss": 2.6406,
"step": 5815
},
{
"epoch": 0.6,
"learning_rate": 3.444250337726989e-05,
"loss": 2.6412,
"step": 5820
},
{
"epoch": 0.61,
"learning_rate": 3.4364136463892024e-05,
"loss": 2.6269,
"step": 5825
},
{
"epoch": 0.61,
"learning_rate": 3.428581210106366e-05,
"loss": 2.646,
"step": 5830
},
{
"epoch": 0.61,
"learning_rate": 3.420753050193223e-05,
"loss": 2.6813,
"step": 5835
},
{
"epoch": 0.61,
"learning_rate": 3.412929187952884e-05,
"loss": 2.7472,
"step": 5840
},
{
"epoch": 0.61,
"learning_rate": 3.4051096446767615e-05,
"loss": 2.6361,
"step": 5845
},
{
"epoch": 0.61,
"learning_rate": 3.397294441644515e-05,
"loss": 2.6996,
"step": 5850
},
{
"epoch": 0.61,
"learning_rate": 3.3894836001239946e-05,
"loss": 2.7101,
"step": 5855
},
{
"epoch": 0.61,
"learning_rate": 3.3816771413711815e-05,
"loss": 2.7277,
"step": 5860
},
{
"epoch": 0.61,
"learning_rate": 3.373875086630125e-05,
"loss": 2.5964,
"step": 5865
},
{
"epoch": 0.61,
"learning_rate": 3.3660774571328944e-05,
"loss": 2.6525,
"step": 5870
},
{
"epoch": 0.61,
"learning_rate": 3.358284274099516e-05,
"loss": 2.7624,
"step": 5875
},
{
"epoch": 0.61,
"learning_rate": 3.3504955587379147e-05,
"loss": 2.7794,
"step": 5880
},
{
"epoch": 0.61,
"learning_rate": 3.342711332243856e-05,
"loss": 2.788,
"step": 5885
},
{
"epoch": 0.61,
"learning_rate": 3.334931615800892e-05,
"loss": 2.7746,
"step": 5890
},
{
"epoch": 0.61,
"learning_rate": 3.3271564305802994e-05,
"loss": 2.8029,
"step": 5895
},
{
"epoch": 0.61,
"learning_rate": 3.319385797741025e-05,
"loss": 2.6299,
"step": 5900
},
{
"epoch": 0.61,
"learning_rate": 3.311619738429628e-05,
"loss": 2.6995,
"step": 5905
},
{
"epoch": 0.61,
"learning_rate": 3.303858273780221e-05,
"loss": 2.6918,
"step": 5910
},
{
"epoch": 0.61,
"learning_rate": 3.296101424914411e-05,
"loss": 2.6757,
"step": 5915
},
{
"epoch": 0.62,
"learning_rate": 3.288349212941246e-05,
"loss": 2.658,
"step": 5920
},
{
"epoch": 0.62,
"learning_rate": 3.280601658957153e-05,
"loss": 2.7098,
"step": 5925
},
{
"epoch": 0.62,
"learning_rate": 3.272858784045887e-05,
"loss": 2.7388,
"step": 5930
},
{
"epoch": 0.62,
"learning_rate": 3.265120609278466e-05,
"loss": 2.6805,
"step": 5935
},
{
"epoch": 0.62,
"learning_rate": 3.257387155713119e-05,
"loss": 2.7034,
"step": 5940
},
{
"epoch": 0.62,
"learning_rate": 3.249658444395224e-05,
"loss": 2.7802,
"step": 5945
},
{
"epoch": 0.62,
"learning_rate": 3.24193449635726e-05,
"loss": 2.7178,
"step": 5950
},
{
"epoch": 0.62,
"learning_rate": 3.234215332618739e-05,
"loss": 2.6878,
"step": 5955
},
{
"epoch": 0.62,
"learning_rate": 3.2265009741861516e-05,
"loss": 2.6513,
"step": 5960
},
{
"epoch": 0.62,
"learning_rate": 3.2187914420529174e-05,
"loss": 2.712,
"step": 5965
},
{
"epoch": 0.62,
"learning_rate": 3.211086757199315e-05,
"loss": 2.6428,
"step": 5970
},
{
"epoch": 0.62,
"learning_rate": 3.2033869405924386e-05,
"loss": 2.7434,
"step": 5975
},
{
"epoch": 0.62,
"learning_rate": 3.195692013186127e-05,
"loss": 2.6887,
"step": 5980
},
{
"epoch": 0.62,
"learning_rate": 3.1880019959209225e-05,
"loss": 2.6404,
"step": 5985
},
{
"epoch": 0.62,
"learning_rate": 3.180316909724e-05,
"loss": 2.723,
"step": 5990
},
{
"epoch": 0.62,
"learning_rate": 3.172636775509114e-05,
"loss": 2.7408,
"step": 5995
},
{
"epoch": 0.62,
"learning_rate": 3.164961614176543e-05,
"loss": 2.6724,
"step": 6000
},
{
"epoch": 0.62,
"learning_rate": 3.1572914466130385e-05,
"loss": 2.6848,
"step": 6005
},
{
"epoch": 0.62,
"learning_rate": 3.1496262936917564e-05,
"loss": 2.7317,
"step": 6010
},
{
"epoch": 0.63,
"learning_rate": 3.141966176272208e-05,
"loss": 2.6826,
"step": 6015
},
{
"epoch": 0.63,
"learning_rate": 3.134311115200199e-05,
"loss": 2.6319,
"step": 6020
},
{
"epoch": 0.63,
"learning_rate": 3.126661131307781e-05,
"loss": 2.7345,
"step": 6025
},
{
"epoch": 0.63,
"learning_rate": 3.119016245413182e-05,
"loss": 2.7528,
"step": 6030
},
{
"epoch": 0.63,
"learning_rate": 3.11137647832076e-05,
"loss": 2.6538,
"step": 6035
},
{
"epoch": 0.63,
"learning_rate": 3.103741850820944e-05,
"loss": 2.7152,
"step": 6040
},
{
"epoch": 0.63,
"learning_rate": 3.0961123836901715e-05,
"loss": 2.6777,
"step": 6045
},
{
"epoch": 0.63,
"learning_rate": 3.088488097690844e-05,
"loss": 2.6781,
"step": 6050
},
{
"epoch": 0.63,
"learning_rate": 3.080869013571257e-05,
"loss": 2.7492,
"step": 6055
},
{
"epoch": 0.63,
"learning_rate": 3.073255152065555e-05,
"loss": 2.7646,
"step": 6060
},
{
"epoch": 0.63,
"learning_rate": 3.065646533893667e-05,
"loss": 2.7003,
"step": 6065
},
{
"epoch": 0.63,
"learning_rate": 3.058043179761252e-05,
"loss": 2.6538,
"step": 6070
},
{
"epoch": 0.63,
"learning_rate": 3.050445110359646e-05,
"loss": 2.6864,
"step": 6075
},
{
"epoch": 0.63,
"learning_rate": 3.0428523463658044e-05,
"loss": 2.6762,
"step": 6080
},
{
"epoch": 0.63,
"learning_rate": 3.0352649084422414e-05,
"loss": 2.7392,
"step": 6085
},
{
"epoch": 0.63,
"learning_rate": 3.0276828172369796e-05,
"loss": 2.7265,
"step": 6090
},
{
"epoch": 0.63,
"learning_rate": 3.020106093383489e-05,
"loss": 2.7306,
"step": 6095
},
{
"epoch": 0.63,
"learning_rate": 3.012534757500638e-05,
"loss": 2.7252,
"step": 6100
},
{
"epoch": 0.63,
"learning_rate": 3.004968830192627e-05,
"loss": 2.8327,
"step": 6105
},
{
"epoch": 0.63,
"learning_rate": 2.9974083320489415e-05,
"loss": 2.7725,
"step": 6110
},
{
"epoch": 0.64,
"learning_rate": 2.98985328364429e-05,
"loss": 2.6487,
"step": 6115
},
{
"epoch": 0.64,
"learning_rate": 2.9823037055385506e-05,
"loss": 2.6798,
"step": 6120
},
{
"epoch": 0.64,
"learning_rate": 2.9747596182767178e-05,
"loss": 2.7353,
"step": 6125
},
{
"epoch": 0.64,
"learning_rate": 2.967221042388838e-05,
"loss": 2.7228,
"step": 6130
},
{
"epoch": 0.64,
"learning_rate": 2.9596879983899672e-05,
"loss": 2.7376,
"step": 6135
},
{
"epoch": 0.64,
"learning_rate": 2.9521605067801016e-05,
"loss": 2.7762,
"step": 6140
},
{
"epoch": 0.64,
"learning_rate": 2.9446385880441274e-05,
"loss": 2.8082,
"step": 6145
},
{
"epoch": 0.64,
"learning_rate": 2.9371222626517648e-05,
"loss": 2.6798,
"step": 6150
},
{
"epoch": 0.64,
"learning_rate": 2.9296115510575177e-05,
"loss": 2.7208,
"step": 6155
},
{
"epoch": 0.64,
"learning_rate": 2.9221064737006077e-05,
"loss": 2.8207,
"step": 6160
},
{
"epoch": 0.64,
"learning_rate": 2.914607051004924e-05,
"loss": 2.5948,
"step": 6165
},
{
"epoch": 0.64,
"learning_rate": 2.907113303378972e-05,
"loss": 2.624,
"step": 6170
},
{
"epoch": 0.64,
"learning_rate": 2.8996252512158062e-05,
"loss": 2.681,
"step": 6175
},
{
"epoch": 0.64,
"learning_rate": 2.8921429148929903e-05,
"loss": 2.7284,
"step": 6180
},
{
"epoch": 0.64,
"learning_rate": 2.8846663147725238e-05,
"loss": 2.7645,
"step": 6185
},
{
"epoch": 0.64,
"learning_rate": 2.8771954712008038e-05,
"loss": 2.5963,
"step": 6190
},
{
"epoch": 0.64,
"learning_rate": 2.86973040450856e-05,
"loss": 2.7704,
"step": 6195
},
{
"epoch": 0.64,
"learning_rate": 2.8622711350107966e-05,
"loss": 2.7948,
"step": 6200
},
{
"epoch": 0.64,
"learning_rate": 2.8548176830067474e-05,
"loss": 2.7581,
"step": 6205
},
{
"epoch": 0.65,
"learning_rate": 2.8473700687798134e-05,
"loss": 2.6956,
"step": 6210
},
{
"epoch": 0.65,
"learning_rate": 2.839928312597506e-05,
"loss": 2.6804,
"step": 6215
},
{
"epoch": 0.65,
"learning_rate": 2.8324924347114013e-05,
"loss": 2.7167,
"step": 6220
},
{
"epoch": 0.65,
"learning_rate": 2.8250624553570694e-05,
"loss": 2.6808,
"step": 6225
},
{
"epoch": 0.65,
"learning_rate": 2.8176383947540376e-05,
"loss": 2.6298,
"step": 6230
},
{
"epoch": 0.65,
"learning_rate": 2.810220273105718e-05,
"loss": 2.636,
"step": 6235
},
{
"epoch": 0.65,
"learning_rate": 2.802808110599367e-05,
"loss": 2.7253,
"step": 6240
},
{
"epoch": 0.65,
"learning_rate": 2.795401927406024e-05,
"loss": 2.7612,
"step": 6245
},
{
"epoch": 0.65,
"learning_rate": 2.7880017436804497e-05,
"loss": 2.7411,
"step": 6250
},
{
"epoch": 0.65,
"learning_rate": 2.780607579561088e-05,
"loss": 2.7017,
"step": 6255
},
{
"epoch": 0.65,
"learning_rate": 2.7732194551699923e-05,
"loss": 2.8703,
"step": 6260
},
{
"epoch": 0.65,
"learning_rate": 2.7658373906127867e-05,
"loss": 2.5608,
"step": 6265
},
{
"epoch": 0.65,
"learning_rate": 2.7584614059786016e-05,
"loss": 2.8136,
"step": 6270
},
{
"epoch": 0.65,
"learning_rate": 2.7510915213400202e-05,
"loss": 2.8,
"step": 6275
},
{
"epoch": 0.65,
"learning_rate": 2.743727756753028e-05,
"loss": 2.6678,
"step": 6280
},
{
"epoch": 0.65,
"learning_rate": 2.7363701322569557e-05,
"loss": 2.8176,
"step": 6285
},
{
"epoch": 0.65,
"learning_rate": 2.7290186678744235e-05,
"loss": 2.6132,
"step": 6290
},
{
"epoch": 0.65,
"learning_rate": 2.72167338361129e-05,
"loss": 2.7669,
"step": 6295
},
{
"epoch": 0.65,
"learning_rate": 2.7143342994565913e-05,
"loss": 2.7726,
"step": 6300
},
{
"epoch": 0.66,
"learning_rate": 2.7070014353824956e-05,
"loss": 2.6988,
"step": 6305
},
{
"epoch": 0.66,
"learning_rate": 2.6996748113442394e-05,
"loss": 2.7458,
"step": 6310
},
{
"epoch": 0.66,
"learning_rate": 2.6923544472800826e-05,
"loss": 2.6396,
"step": 6315
},
{
"epoch": 0.66,
"learning_rate": 2.6850403631112487e-05,
"loss": 2.7667,
"step": 6320
},
{
"epoch": 0.66,
"learning_rate": 2.6777325787418674e-05,
"loss": 2.5955,
"step": 6325
},
{
"epoch": 0.66,
"learning_rate": 2.6704311140589293e-05,
"loss": 2.6748,
"step": 6330
},
{
"epoch": 0.66,
"learning_rate": 2.6631359889322228e-05,
"loss": 2.657,
"step": 6335
},
{
"epoch": 0.66,
"learning_rate": 2.6558472232142877e-05,
"loss": 2.7549,
"step": 6340
},
{
"epoch": 0.66,
"learning_rate": 2.6485648367403587e-05,
"loss": 2.633,
"step": 6345
},
{
"epoch": 0.66,
"learning_rate": 2.641288849328304e-05,
"loss": 2.6923,
"step": 6350
},
{
"epoch": 0.66,
"learning_rate": 2.634019280778583e-05,
"loss": 2.7234,
"step": 6355
},
{
"epoch": 0.66,
"learning_rate": 2.6267561508741868e-05,
"loss": 2.7171,
"step": 6360
},
{
"epoch": 0.66,
"learning_rate": 2.6194994793805837e-05,
"loss": 2.7072,
"step": 6365
},
{
"epoch": 0.66,
"learning_rate": 2.612249286045669e-05,
"loss": 2.6615,
"step": 6370
},
{
"epoch": 0.66,
"learning_rate": 2.6050055905997018e-05,
"loss": 2.7311,
"step": 6375
},
{
"epoch": 0.66,
"learning_rate": 2.5977684127552665e-05,
"loss": 2.7683,
"step": 6380
},
{
"epoch": 0.66,
"learning_rate": 2.5905377722072045e-05,
"loss": 2.8014,
"step": 6385
},
{
"epoch": 0.66,
"learning_rate": 2.5833136886325705e-05,
"loss": 2.7037,
"step": 6390
},
{
"epoch": 0.66,
"learning_rate": 2.576096181690576e-05,
"loss": 2.6792,
"step": 6395
},
{
"epoch": 0.67,
"learning_rate": 2.5688852710225315e-05,
"loss": 2.6602,
"step": 6400
},
{
"epoch": 0.67,
"learning_rate": 2.561680976251802e-05,
"loss": 2.6933,
"step": 6405
},
{
"epoch": 0.67,
"learning_rate": 2.554483316983741e-05,
"loss": 2.6807,
"step": 6410
},
{
"epoch": 0.67,
"learning_rate": 2.5472923128056526e-05,
"loss": 2.6869,
"step": 6415
},
{
"epoch": 0.67,
"learning_rate": 2.5401079832867265e-05,
"loss": 2.6797,
"step": 6420
},
{
"epoch": 0.67,
"learning_rate": 2.5329303479779854e-05,
"loss": 2.7077,
"step": 6425
},
{
"epoch": 0.67,
"learning_rate": 2.5257594264122397e-05,
"loss": 2.743,
"step": 6430
},
{
"epoch": 0.67,
"learning_rate": 2.5185952381040266e-05,
"loss": 2.7593,
"step": 6435
},
{
"epoch": 0.67,
"learning_rate": 2.5114378025495622e-05,
"loss": 2.7292,
"step": 6440
},
{
"epoch": 0.67,
"learning_rate": 2.504287139226684e-05,
"loss": 2.7821,
"step": 6445
},
{
"epoch": 0.67,
"learning_rate": 2.4971432675947986e-05,
"loss": 2.6211,
"step": 6450
},
{
"epoch": 0.67,
"learning_rate": 2.490006207094835e-05,
"loss": 2.7897,
"step": 6455
},
{
"epoch": 0.67,
"learning_rate": 2.4828759771491794e-05,
"loss": 2.7707,
"step": 6460
},
{
"epoch": 0.67,
"learning_rate": 2.4757525971616362e-05,
"loss": 2.6936,
"step": 6465
},
{
"epoch": 0.67,
"learning_rate": 2.468636086517368e-05,
"loss": 2.7017,
"step": 6470
},
{
"epoch": 0.67,
"learning_rate": 2.4615264645828385e-05,
"loss": 2.7199,
"step": 6475
},
{
"epoch": 0.67,
"learning_rate": 2.4544237507057723e-05,
"loss": 2.6177,
"step": 6480
},
{
"epoch": 0.67,
"learning_rate": 2.4473279642150864e-05,
"loss": 2.6708,
"step": 6485
},
{
"epoch": 0.67,
"learning_rate": 2.4402391244208527e-05,
"loss": 2.6982,
"step": 6490
},
{
"epoch": 0.67,
"learning_rate": 2.4331572506142392e-05,
"loss": 2.7325,
"step": 6495
},
{
"epoch": 0.68,
"learning_rate": 2.4260823620674494e-05,
"loss": 2.5804,
"step": 6500
},
{
"epoch": 0.68,
"learning_rate": 2.419014478033685e-05,
"loss": 2.7622,
"step": 6505
},
{
"epoch": 0.68,
"learning_rate": 2.411953617747083e-05,
"loss": 2.6657,
"step": 6510
},
{
"epoch": 0.68,
"learning_rate": 2.4048998004226675e-05,
"loss": 2.6913,
"step": 6515
},
{
"epoch": 0.68,
"learning_rate": 2.3978530452562974e-05,
"loss": 2.6726,
"step": 6520
},
{
"epoch": 0.68,
"learning_rate": 2.3908133714246078e-05,
"loss": 2.5888,
"step": 6525
},
{
"epoch": 0.68,
"learning_rate": 2.3837807980849696e-05,
"loss": 2.6843,
"step": 6530
},
{
"epoch": 0.68,
"learning_rate": 2.3767553443754243e-05,
"loss": 2.6587,
"step": 6535
},
{
"epoch": 0.68,
"learning_rate": 2.3697370294146444e-05,
"loss": 2.6843,
"step": 6540
},
{
"epoch": 0.68,
"learning_rate": 2.3627258723018737e-05,
"loss": 2.6966,
"step": 6545
},
{
"epoch": 0.68,
"learning_rate": 2.3557218921168735e-05,
"loss": 2.7045,
"step": 6550
},
{
"epoch": 0.68,
"learning_rate": 2.34872510791988e-05,
"loss": 2.749,
"step": 6555
},
{
"epoch": 0.68,
"learning_rate": 2.341735538751541e-05,
"loss": 2.6624,
"step": 6560
},
{
"epoch": 0.68,
"learning_rate": 2.3347532036328735e-05,
"loss": 2.6501,
"step": 6565
},
{
"epoch": 0.68,
"learning_rate": 2.327778121565209e-05,
"loss": 2.7085,
"step": 6570
},
{
"epoch": 0.68,
"learning_rate": 2.3208103115301354e-05,
"loss": 2.6656,
"step": 6575
},
{
"epoch": 0.68,
"learning_rate": 2.3138497924894558e-05,
"loss": 2.6336,
"step": 6580
},
{
"epoch": 0.68,
"learning_rate": 2.306896583385132e-05,
"loss": 2.5965,
"step": 6585
},
{
"epoch": 0.68,
"learning_rate": 2.2999507031392303e-05,
"loss": 2.6329,
"step": 6590
},
{
"epoch": 0.69,
"learning_rate": 2.2930121706538764e-05,
"loss": 2.7946,
"step": 6595
},
{
"epoch": 0.69,
"learning_rate": 2.2860810048111946e-05,
"loss": 2.7419,
"step": 6600
},
{
"epoch": 0.69,
"learning_rate": 2.279157224473269e-05,
"loss": 2.7096,
"step": 6605
},
{
"epoch": 0.69,
"learning_rate": 2.2722408484820767e-05,
"loss": 2.6848,
"step": 6610
},
{
"epoch": 0.69,
"learning_rate": 2.2653318956594526e-05,
"loss": 2.6628,
"step": 6615
},
{
"epoch": 0.69,
"learning_rate": 2.25843038480703e-05,
"loss": 2.7039,
"step": 6620
},
{
"epoch": 0.69,
"learning_rate": 2.2515363347061837e-05,
"loss": 2.6813,
"step": 6625
},
{
"epoch": 0.69,
"learning_rate": 2.2446497641179927e-05,
"loss": 2.7152,
"step": 6630
},
{
"epoch": 0.69,
"learning_rate": 2.2377706917831753e-05,
"loss": 2.7139,
"step": 6635
},
{
"epoch": 0.69,
"learning_rate": 2.2308991364220493e-05,
"loss": 2.7881,
"step": 6640
},
{
"epoch": 0.69,
"learning_rate": 2.2240351167344763e-05,
"loss": 2.6772,
"step": 6645
},
{
"epoch": 0.69,
"learning_rate": 2.2171786513998056e-05,
"loss": 2.7608,
"step": 6650
},
{
"epoch": 0.69,
"learning_rate": 2.2103297590768334e-05,
"loss": 2.6995,
"step": 6655
},
{
"epoch": 0.69,
"learning_rate": 2.2034884584037458e-05,
"loss": 2.6729,
"step": 6660
},
{
"epoch": 0.69,
"learning_rate": 2.1966547679980692e-05,
"loss": 2.7203,
"step": 6665
},
{
"epoch": 0.69,
"learning_rate": 2.1898287064566216e-05,
"loss": 2.6771,
"step": 6670
},
{
"epoch": 0.69,
"learning_rate": 2.1830102923554546e-05,
"loss": 2.7377,
"step": 6675
},
{
"epoch": 0.69,
"learning_rate": 2.176199544249817e-05,
"loss": 2.5989,
"step": 6680
},
{
"epoch": 0.69,
"learning_rate": 2.1693964806740868e-05,
"loss": 2.6636,
"step": 6685
},
{
"epoch": 0.7,
"learning_rate": 2.1626011201417363e-05,
"loss": 2.6873,
"step": 6690
},
{
"epoch": 0.7,
"learning_rate": 2.1558134811452745e-05,
"loss": 2.654,
"step": 6695
},
{
"epoch": 0.7,
"learning_rate": 2.1490335821561923e-05,
"loss": 2.6158,
"step": 6700
},
{
"epoch": 0.7,
"learning_rate": 2.142261441624926e-05,
"loss": 2.6667,
"step": 6705
},
{
"epoch": 0.7,
"learning_rate": 2.1354970779807882e-05,
"loss": 2.7006,
"step": 6710
},
{
"epoch": 0.7,
"learning_rate": 2.128740509631936e-05,
"loss": 2.572,
"step": 6715
},
{
"epoch": 0.7,
"learning_rate": 2.1219917549653123e-05,
"loss": 2.7011,
"step": 6720
},
{
"epoch": 0.7,
"learning_rate": 2.1152508323465897e-05,
"loss": 2.7622,
"step": 6725
},
{
"epoch": 0.7,
"learning_rate": 2.108517760120134e-05,
"loss": 2.7324,
"step": 6730
},
{
"epoch": 0.7,
"learning_rate": 2.1017925566089435e-05,
"loss": 2.6785,
"step": 6735
},
{
"epoch": 0.7,
"learning_rate": 2.095075240114606e-05,
"loss": 2.6959,
"step": 6740
},
{
"epoch": 0.7,
"learning_rate": 2.0883658289172464e-05,
"loss": 2.6875,
"step": 6745
},
{
"epoch": 0.7,
"learning_rate": 2.0816643412754694e-05,
"loss": 2.7088,
"step": 6750
},
{
"epoch": 0.7,
"learning_rate": 2.074970795426327e-05,
"loss": 2.7047,
"step": 6755
},
{
"epoch": 0.7,
"learning_rate": 2.0682852095852505e-05,
"loss": 2.7214,
"step": 6760
},
{
"epoch": 0.7,
"learning_rate": 2.061607601946014e-05,
"loss": 2.7047,
"step": 6765
},
{
"epoch": 0.7,
"learning_rate": 2.0549379906806815e-05,
"loss": 2.7401,
"step": 6770
},
{
"epoch": 0.7,
"learning_rate": 2.0482763939395495e-05,
"loss": 2.7122,
"step": 6775
},
{
"epoch": 0.7,
"learning_rate": 2.041622829851112e-05,
"loss": 2.6008,
"step": 6780
},
{
"epoch": 0.71,
"learning_rate": 2.0349773165219965e-05,
"loss": 2.695,
"step": 6785
},
{
"epoch": 0.71,
"learning_rate": 2.0283398720369267e-05,
"loss": 2.6169,
"step": 6790
},
{
"epoch": 0.71,
"learning_rate": 2.0217105144586678e-05,
"loss": 2.6313,
"step": 6795
},
{
"epoch": 0.71,
"learning_rate": 2.015089261827973e-05,
"loss": 2.7475,
"step": 6800
},
{
"epoch": 0.71,
"learning_rate": 2.0084761321635454e-05,
"loss": 2.6856,
"step": 6805
},
{
"epoch": 0.71,
"learning_rate": 2.001871143461978e-05,
"loss": 2.6443,
"step": 6810
},
{
"epoch": 0.71,
"learning_rate": 1.995274313697712e-05,
"loss": 2.5923,
"step": 6815
},
{
"epoch": 0.71,
"learning_rate": 1.9886856608229864e-05,
"loss": 2.7622,
"step": 6820
},
{
"epoch": 0.71,
"learning_rate": 1.9821052027677833e-05,
"loss": 2.61,
"step": 6825
},
{
"epoch": 0.71,
"learning_rate": 1.9755329574397897e-05,
"loss": 2.6933,
"step": 6830
},
{
"epoch": 0.71,
"learning_rate": 1.968968942724337e-05,
"loss": 2.5344,
"step": 6835
},
{
"epoch": 0.71,
"learning_rate": 1.962413176484362e-05,
"loss": 2.7351,
"step": 6840
},
{
"epoch": 0.71,
"learning_rate": 1.9558656765603583e-05,
"loss": 2.6866,
"step": 6845
},
{
"epoch": 0.71,
"learning_rate": 1.9493264607703138e-05,
"loss": 2.7705,
"step": 6850
},
{
"epoch": 0.71,
"learning_rate": 1.9427955469096825e-05,
"loss": 2.6399,
"step": 6855
},
{
"epoch": 0.71,
"learning_rate": 1.936272952751319e-05,
"loss": 2.6969,
"step": 6860
},
{
"epoch": 0.71,
"learning_rate": 1.9297586960454404e-05,
"loss": 2.6675,
"step": 6865
},
{
"epoch": 0.71,
"learning_rate": 1.9232527945195778e-05,
"loss": 2.7113,
"step": 6870
},
{
"epoch": 0.71,
"learning_rate": 1.9167552658785165e-05,
"loss": 2.763,
"step": 6875
},
{
"epoch": 0.71,
"learning_rate": 1.910266127804264e-05,
"loss": 2.7665,
"step": 6880
},
{
"epoch": 0.72,
"learning_rate": 1.9037853979559923e-05,
"loss": 2.7523,
"step": 6885
},
{
"epoch": 0.72,
"learning_rate": 1.8973130939699858e-05,
"loss": 2.5809,
"step": 6890
},
{
"epoch": 0.72,
"learning_rate": 1.8908492334596122e-05,
"loss": 2.598,
"step": 6895
},
{
"epoch": 0.72,
"learning_rate": 1.884393834015248e-05,
"loss": 2.684,
"step": 6900
},
{
"epoch": 0.72,
"learning_rate": 1.8779469132042534e-05,
"loss": 2.7288,
"step": 6905
},
{
"epoch": 0.72,
"learning_rate": 1.8715084885709084e-05,
"loss": 2.7266,
"step": 6910
},
{
"epoch": 0.72,
"learning_rate": 1.8650785776363772e-05,
"loss": 2.6357,
"step": 6915
},
{
"epoch": 0.72,
"learning_rate": 1.8586571978986562e-05,
"loss": 2.7422,
"step": 6920
},
{
"epoch": 0.72,
"learning_rate": 1.852244366832518e-05,
"loss": 2.6597,
"step": 6925
},
{
"epoch": 0.72,
"learning_rate": 1.8458401018894807e-05,
"loss": 2.7679,
"step": 6930
},
{
"epoch": 0.72,
"learning_rate": 1.8394444204977428e-05,
"loss": 2.7361,
"step": 6935
},
{
"epoch": 0.72,
"learning_rate": 1.833057340062149e-05,
"loss": 2.7658,
"step": 6940
},
{
"epoch": 0.72,
"learning_rate": 1.8266788779641396e-05,
"loss": 2.6302,
"step": 6945
},
{
"epoch": 0.72,
"learning_rate": 1.8203090515616946e-05,
"loss": 2.6127,
"step": 6950
},
{
"epoch": 0.72,
"learning_rate": 1.8139478781892977e-05,
"loss": 2.7266,
"step": 6955
},
{
"epoch": 0.72,
"learning_rate": 1.8075953751578872e-05,
"loss": 2.7178,
"step": 6960
},
{
"epoch": 0.72,
"learning_rate": 1.801251559754798e-05,
"loss": 2.6609,
"step": 6965
},
{
"epoch": 0.72,
"learning_rate": 1.7949164492437348e-05,
"loss": 2.7033,
"step": 6970
},
{
"epoch": 0.72,
"learning_rate": 1.7885900608647028e-05,
"loss": 2.7223,
"step": 6975
},
{
"epoch": 0.73,
"learning_rate": 1.782272411833978e-05,
"loss": 2.636,
"step": 6980
},
{
"epoch": 0.73,
"learning_rate": 1.7759635193440476e-05,
"loss": 2.6352,
"step": 6985
},
{
"epoch": 0.73,
"learning_rate": 1.7696634005635753e-05,
"loss": 2.7252,
"step": 6990
},
{
"epoch": 0.73,
"learning_rate": 1.7633720726373465e-05,
"loss": 2.6942,
"step": 6995
},
{
"epoch": 0.73,
"learning_rate": 1.7570895526862202e-05,
"loss": 2.6836,
"step": 7000
},
{
"epoch": 0.73,
"learning_rate": 1.7508158578070928e-05,
"loss": 2.7174,
"step": 7005
},
{
"epoch": 0.73,
"learning_rate": 1.7445510050728358e-05,
"loss": 2.6544,
"step": 7010
},
{
"epoch": 0.73,
"learning_rate": 1.738295011532266e-05,
"loss": 2.6737,
"step": 7015
},
{
"epoch": 0.73,
"learning_rate": 1.7320478942100894e-05,
"loss": 2.6655,
"step": 7020
},
{
"epoch": 0.73,
"learning_rate": 1.7258096701068522e-05,
"loss": 2.7223,
"step": 7025
},
{
"epoch": 0.73,
"learning_rate": 1.7195803561989048e-05,
"loss": 2.7321,
"step": 7030
},
{
"epoch": 0.73,
"learning_rate": 1.713359969438349e-05,
"loss": 2.6926,
"step": 7035
},
{
"epoch": 0.73,
"learning_rate": 1.7071485267529864e-05,
"loss": 2.6538,
"step": 7040
},
{
"epoch": 0.73,
"learning_rate": 1.70094604504629e-05,
"loss": 2.6991,
"step": 7045
},
{
"epoch": 0.73,
"learning_rate": 1.6947525411973364e-05,
"loss": 2.6074,
"step": 7050
},
{
"epoch": 0.73,
"learning_rate": 1.6885680320607776e-05,
"loss": 2.6396,
"step": 7055
},
{
"epoch": 0.73,
"learning_rate": 1.6823925344667818e-05,
"loss": 2.7287,
"step": 7060
},
{
"epoch": 0.73,
"learning_rate": 1.6762260652209983e-05,
"loss": 2.7771,
"step": 7065
},
{
"epoch": 0.73,
"learning_rate": 1.6700686411045075e-05,
"loss": 2.6666,
"step": 7070
},
{
"epoch": 0.74,
"learning_rate": 1.6639202788737694e-05,
"loss": 2.7717,
"step": 7075
},
{
"epoch": 0.74,
"learning_rate": 1.6577809952605904e-05,
"loss": 2.7856,
"step": 7080
},
{
"epoch": 0.74,
"learning_rate": 1.6516508069720653e-05,
"loss": 2.5967,
"step": 7085
},
{
"epoch": 0.74,
"learning_rate": 1.6455297306905404e-05,
"loss": 2.5617,
"step": 7090
},
{
"epoch": 0.74,
"learning_rate": 1.6394177830735668e-05,
"loss": 2.7749,
"step": 7095
},
{
"epoch": 0.74,
"learning_rate": 1.6333149807538473e-05,
"loss": 2.684,
"step": 7100
},
{
"epoch": 0.74,
"learning_rate": 1.6272213403392023e-05,
"loss": 2.6765,
"step": 7105
},
{
"epoch": 0.74,
"learning_rate": 1.6211368784125204e-05,
"loss": 2.6622,
"step": 7110
},
{
"epoch": 0.74,
"learning_rate": 1.6150616115317052e-05,
"loss": 2.6036,
"step": 7115
},
{
"epoch": 0.74,
"learning_rate": 1.6089955562296483e-05,
"loss": 2.8253,
"step": 7120
},
{
"epoch": 0.74,
"learning_rate": 1.6029387290141633e-05,
"loss": 2.6746,
"step": 7125
},
{
"epoch": 0.74,
"learning_rate": 1.596891146367958e-05,
"loss": 2.6799,
"step": 7130
},
{
"epoch": 0.74,
"learning_rate": 1.5908528247485767e-05,
"loss": 2.6718,
"step": 7135
},
{
"epoch": 0.74,
"learning_rate": 1.584823780588366e-05,
"loss": 2.7057,
"step": 7140
},
{
"epoch": 0.74,
"learning_rate": 1.5788040302944245e-05,
"loss": 2.6235,
"step": 7145
},
{
"epoch": 0.74,
"learning_rate": 1.5727935902485563e-05,
"loss": 2.6935,
"step": 7150
},
{
"epoch": 0.74,
"learning_rate": 1.5667924768072335e-05,
"loss": 2.472,
"step": 7155
},
{
"epoch": 0.74,
"learning_rate": 1.560800706301542e-05,
"loss": 2.6387,
"step": 7160
},
{
"epoch": 0.74,
"learning_rate": 1.554818295037146e-05,
"loss": 2.7737,
"step": 7165
},
{
"epoch": 0.75,
"learning_rate": 1.548845259294241e-05,
"loss": 2.7464,
"step": 7170
},
{
"epoch": 0.75,
"learning_rate": 1.5428816153275038e-05,
"loss": 2.714,
"step": 7175
},
{
"epoch": 0.75,
"learning_rate": 1.5369273793660573e-05,
"loss": 2.5639,
"step": 7180
},
{
"epoch": 0.75,
"learning_rate": 1.5309825676134217e-05,
"loss": 2.6844,
"step": 7185
},
{
"epoch": 0.75,
"learning_rate": 1.5250471962474656e-05,
"loss": 2.692,
"step": 7190
},
{
"epoch": 0.75,
"learning_rate": 1.5191212814203765e-05,
"loss": 2.6079,
"step": 7195
},
{
"epoch": 0.75,
"learning_rate": 1.5132048392585974e-05,
"loss": 2.7313,
"step": 7200
},
{
"epoch": 0.75,
"learning_rate": 1.5072978858628006e-05,
"loss": 2.7072,
"step": 7205
},
{
"epoch": 0.75,
"learning_rate": 1.5014004373078294e-05,
"loss": 2.6266,
"step": 7210
},
{
"epoch": 0.75,
"learning_rate": 1.4955125096426675e-05,
"loss": 2.6415,
"step": 7215
},
{
"epoch": 0.75,
"learning_rate": 1.4896341188903867e-05,
"loss": 2.6983,
"step": 7220
},
{
"epoch": 0.75,
"learning_rate": 1.4837652810481023e-05,
"loss": 2.6821,
"step": 7225
},
{
"epoch": 0.75,
"learning_rate": 1.4779060120869392e-05,
"loss": 2.7095,
"step": 7230
},
{
"epoch": 0.75,
"learning_rate": 1.4720563279519756e-05,
"loss": 2.5766,
"step": 7235
},
{
"epoch": 0.75,
"learning_rate": 1.4662162445622097e-05,
"loss": 2.7757,
"step": 7240
},
{
"epoch": 0.75,
"learning_rate": 1.4603857778105152e-05,
"loss": 2.6255,
"step": 7245
},
{
"epoch": 0.75,
"learning_rate": 1.4545649435635878e-05,
"loss": 2.6917,
"step": 7250
},
{
"epoch": 0.75,
"learning_rate": 1.4487537576619176e-05,
"loss": 2.7107,
"step": 7255
},
{
"epoch": 0.75,
"learning_rate": 1.4429522359197362e-05,
"loss": 2.6474,
"step": 7260
},
{
"epoch": 0.76,
"learning_rate": 1.43716039412497e-05,
"loss": 2.7187,
"step": 7265
},
{
"epoch": 0.76,
"learning_rate": 1.4313782480392147e-05,
"loss": 2.65,
"step": 7270
},
{
"epoch": 0.76,
"learning_rate": 1.4256058133976686e-05,
"loss": 2.7375,
"step": 7275
},
{
"epoch": 0.76,
"learning_rate": 1.41984310590911e-05,
"loss": 2.6253,
"step": 7280
},
{
"epoch": 0.76,
"learning_rate": 1.41409014125584e-05,
"loss": 2.68,
"step": 7285
},
{
"epoch": 0.76,
"learning_rate": 1.4083469350936513e-05,
"loss": 2.7554,
"step": 7290
},
{
"epoch": 0.76,
"learning_rate": 1.4026135030517796e-05,
"loss": 2.6835,
"step": 7295
},
{
"epoch": 0.76,
"learning_rate": 1.3968898607328573e-05,
"loss": 2.6024,
"step": 7300
},
{
"epoch": 0.76,
"learning_rate": 1.3911760237128829e-05,
"loss": 2.7322,
"step": 7305
},
{
"epoch": 0.76,
"learning_rate": 1.3854720075411637e-05,
"loss": 2.7088,
"step": 7310
},
{
"epoch": 0.76,
"learning_rate": 1.3797778277402862e-05,
"loss": 2.6111,
"step": 7315
},
{
"epoch": 0.76,
"learning_rate": 1.3740934998060694e-05,
"loss": 2.72,
"step": 7320
},
{
"epoch": 0.76,
"learning_rate": 1.3684190392075164e-05,
"loss": 2.6499,
"step": 7325
},
{
"epoch": 0.76,
"learning_rate": 1.3627544613867853e-05,
"loss": 2.6333,
"step": 7330
},
{
"epoch": 0.76,
"learning_rate": 1.3570997817591318e-05,
"loss": 2.7031,
"step": 7335
},
{
"epoch": 0.76,
"learning_rate": 1.3514550157128814e-05,
"loss": 2.6664,
"step": 7340
},
{
"epoch": 0.76,
"learning_rate": 1.3458201786093794e-05,
"loss": 2.5993,
"step": 7345
},
{
"epoch": 0.76,
"learning_rate": 1.3401952857829509e-05,
"loss": 2.7224,
"step": 7350
},
{
"epoch": 0.76,
"learning_rate": 1.3345803525408606e-05,
"loss": 2.7093,
"step": 7355
},
{
"epoch": 0.76,
"learning_rate": 1.3289753941632649e-05,
"loss": 2.5709,
"step": 7360
},
{
"epoch": 0.77,
"learning_rate": 1.3233804259031813e-05,
"loss": 2.7049,
"step": 7365
},
{
"epoch": 0.77,
"learning_rate": 1.3177954629864386e-05,
"loss": 2.7034,
"step": 7370
},
{
"epoch": 0.77,
"learning_rate": 1.3122205206116345e-05,
"loss": 2.5319,
"step": 7375
},
{
"epoch": 0.77,
"learning_rate": 1.306655613950103e-05,
"loss": 2.6428,
"step": 7380
},
{
"epoch": 0.77,
"learning_rate": 1.3011007581458611e-05,
"loss": 2.6509,
"step": 7385
},
{
"epoch": 0.77,
"learning_rate": 1.2955559683155799e-05,
"loss": 2.716,
"step": 7390
},
{
"epoch": 0.77,
"learning_rate": 1.2900212595485356e-05,
"loss": 2.5922,
"step": 7395
},
{
"epoch": 0.77,
"learning_rate": 1.2844966469065672e-05,
"loss": 2.6651,
"step": 7400
},
{
"epoch": 0.77,
"learning_rate": 1.2789821454240435e-05,
"loss": 2.7039,
"step": 7405
},
{
"epoch": 0.77,
"learning_rate": 1.2734777701078133e-05,
"loss": 2.7227,
"step": 7410
},
{
"epoch": 0.77,
"learning_rate": 1.2679835359371706e-05,
"loss": 2.7001,
"step": 7415
},
{
"epoch": 0.77,
"learning_rate": 1.2624994578638127e-05,
"loss": 2.5845,
"step": 7420
},
{
"epoch": 0.77,
"learning_rate": 1.2570255508117978e-05,
"loss": 2.7368,
"step": 7425
},
{
"epoch": 0.77,
"learning_rate": 1.2515618296775056e-05,
"loss": 2.7142,
"step": 7430
},
{
"epoch": 0.77,
"learning_rate": 1.246108309329594e-05,
"loss": 2.6,
"step": 7435
},
{
"epoch": 0.77,
"learning_rate": 1.2406650046089635e-05,
"loss": 2.6904,
"step": 7440
},
{
"epoch": 0.77,
"learning_rate": 1.2352319303287163e-05,
"loss": 2.6496,
"step": 7445
},
{
"epoch": 0.77,
"learning_rate": 1.229809101274108e-05,
"loss": 2.7377,
"step": 7450
},
{
"epoch": 0.77,
"learning_rate": 1.2243965322025202e-05,
"loss": 2.6886,
"step": 7455
},
{
"epoch": 0.78,
"learning_rate": 1.2189942378434083e-05,
"loss": 2.5865,
"step": 7460
},
{
"epoch": 0.78,
"learning_rate": 1.2136022328982693e-05,
"loss": 2.7591,
"step": 7465
},
{
"epoch": 0.78,
"learning_rate": 1.2082205320406004e-05,
"loss": 2.729,
"step": 7470
},
{
"epoch": 0.78,
"learning_rate": 1.2028491499158534e-05,
"loss": 2.698,
"step": 7475
},
{
"epoch": 0.78,
"learning_rate": 1.1974881011414046e-05,
"loss": 2.7502,
"step": 7480
},
{
"epoch": 0.78,
"learning_rate": 1.1921374003065044e-05,
"loss": 2.6928,
"step": 7485
},
{
"epoch": 0.78,
"learning_rate": 1.186797061972248e-05,
"loss": 2.6138,
"step": 7490
},
{
"epoch": 0.78,
"learning_rate": 1.1814671006715267e-05,
"loss": 2.6584,
"step": 7495
},
{
"epoch": 0.78,
"learning_rate": 1.1761475309089948e-05,
"loss": 2.6959,
"step": 7500
},
{
"epoch": 0.78,
"learning_rate": 1.1708383671610284e-05,
"loss": 2.4898,
"step": 7505
},
{
"epoch": 0.78,
"learning_rate": 1.1655396238756805e-05,
"loss": 2.686,
"step": 7510
},
{
"epoch": 0.78,
"learning_rate": 1.1602513154726512e-05,
"loss": 2.6382,
"step": 7515
},
{
"epoch": 0.78,
"learning_rate": 1.1549734563432436e-05,
"loss": 2.7187,
"step": 7520
},
{
"epoch": 0.78,
"learning_rate": 1.1497060608503202e-05,
"loss": 2.6762,
"step": 7525
},
{
"epoch": 0.78,
"learning_rate": 1.1444491433282739e-05,
"loss": 2.7233,
"step": 7530
},
{
"epoch": 0.78,
"learning_rate": 1.1392027180829795e-05,
"loss": 2.7589,
"step": 7535
},
{
"epoch": 0.78,
"learning_rate": 1.1339667993917602e-05,
"loss": 2.7376,
"step": 7540
},
{
"epoch": 0.78,
"learning_rate": 1.1287414015033493e-05,
"loss": 2.6907,
"step": 7545
},
{
"epoch": 0.78,
"learning_rate": 1.1235265386378446e-05,
"loss": 2.5883,
"step": 7550
},
{
"epoch": 0.79,
"learning_rate": 1.1183222249866793e-05,
"loss": 2.6426,
"step": 7555
},
{
"epoch": 0.79,
"learning_rate": 1.1131284747125747e-05,
"loss": 2.7127,
"step": 7560
},
{
"epoch": 0.79,
"learning_rate": 1.107945301949508e-05,
"loss": 2.6546,
"step": 7565
},
{
"epoch": 0.79,
"learning_rate": 1.1027727208026711e-05,
"loss": 2.721,
"step": 7570
},
{
"epoch": 0.79,
"learning_rate": 1.0976107453484314e-05,
"loss": 2.7973,
"step": 7575
},
{
"epoch": 0.79,
"learning_rate": 1.0924593896342977e-05,
"loss": 2.7185,
"step": 7580
},
{
"epoch": 0.79,
"learning_rate": 1.0873186676788726e-05,
"loss": 2.6295,
"step": 7585
},
{
"epoch": 0.79,
"learning_rate": 1.0821885934718262e-05,
"loss": 2.5328,
"step": 7590
},
{
"epoch": 0.79,
"learning_rate": 1.0770691809738514e-05,
"loss": 2.5976,
"step": 7595
},
{
"epoch": 0.79,
"learning_rate": 1.0719604441166247e-05,
"loss": 2.6423,
"step": 7600
},
{
"epoch": 0.79,
"learning_rate": 1.066862396802773e-05,
"loss": 2.7176,
"step": 7605
},
{
"epoch": 0.79,
"learning_rate": 1.0617750529058312e-05,
"loss": 2.6283,
"step": 7610
},
{
"epoch": 0.79,
"learning_rate": 1.0566984262702073e-05,
"loss": 2.6491,
"step": 7615
},
{
"epoch": 0.79,
"learning_rate": 1.0516325307111468e-05,
"loss": 2.6572,
"step": 7620
},
{
"epoch": 0.79,
"learning_rate": 1.0465773800146862e-05,
"loss": 2.6411,
"step": 7625
},
{
"epoch": 0.79,
"learning_rate": 1.0415329879376278e-05,
"loss": 2.725,
"step": 7630
},
{
"epoch": 0.79,
"learning_rate": 1.0364993682074915e-05,
"loss": 2.7082,
"step": 7635
},
{
"epoch": 0.79,
"learning_rate": 1.0314765345224831e-05,
"loss": 2.6866,
"step": 7640
},
{
"epoch": 0.79,
"learning_rate": 1.0264645005514584e-05,
"loss": 2.6563,
"step": 7645
},
{
"epoch": 0.8,
"learning_rate": 1.0214632799338802e-05,
"loss": 2.6094,
"step": 7650
},
{
"epoch": 0.8,
"learning_rate": 1.0164728862797878e-05,
"loss": 2.659,
"step": 7655
},
{
"epoch": 0.8,
"learning_rate": 1.0114933331697513e-05,
"loss": 2.6879,
"step": 7660
},
{
"epoch": 0.8,
"learning_rate": 1.006524634154845e-05,
"loss": 2.5692,
"step": 7665
},
{
"epoch": 0.8,
"learning_rate": 1.0015668027566056e-05,
"loss": 2.6626,
"step": 7670
},
{
"epoch": 0.8,
"learning_rate": 9.966198524669896e-06,
"loss": 2.6646,
"step": 7675
},
{
"epoch": 0.8,
"learning_rate": 9.91683796748349e-06,
"loss": 2.638,
"step": 7680
},
{
"epoch": 0.8,
"learning_rate": 9.86758649033383e-06,
"loss": 2.6332,
"step": 7685
},
{
"epoch": 0.8,
"learning_rate": 9.818444227251089e-06,
"loss": 2.6538,
"step": 7690
},
{
"epoch": 0.8,
"learning_rate": 9.769411311968247e-06,
"loss": 2.649,
"step": 7695
},
{
"epoch": 0.8,
"learning_rate": 9.720487877920658e-06,
"loss": 2.7121,
"step": 7700
},
{
"epoch": 0.8,
"learning_rate": 9.671674058245805e-06,
"loss": 2.6095,
"step": 7705
},
{
"epoch": 0.8,
"learning_rate": 9.622969985782803e-06,
"loss": 2.7791,
"step": 7710
},
{
"epoch": 0.8,
"learning_rate": 9.57437579307217e-06,
"loss": 2.5668,
"step": 7715
},
{
"epoch": 0.8,
"learning_rate": 9.525891612355364e-06,
"loss": 2.6692,
"step": 7720
},
{
"epoch": 0.8,
"learning_rate": 9.47751757557449e-06,
"loss": 2.5485,
"step": 7725
},
{
"epoch": 0.8,
"learning_rate": 9.429253814371897e-06,
"loss": 2.7023,
"step": 7730
},
{
"epoch": 0.8,
"learning_rate": 9.381100460089826e-06,
"loss": 2.7944,
"step": 7735
},
{
"epoch": 0.8,
"learning_rate": 9.333057643770083e-06,
"loss": 2.6942,
"step": 7740
},
{
"epoch": 0.8,
"learning_rate": 9.285125496153667e-06,
"loss": 2.7199,
"step": 7745
},
{
"epoch": 0.81,
"learning_rate": 9.237304147680375e-06,
"loss": 2.6637,
"step": 7750
},
{
"epoch": 0.81,
"learning_rate": 9.189593728488533e-06,
"loss": 2.6371,
"step": 7755
},
{
"epoch": 0.81,
"learning_rate": 9.141994368414524e-06,
"loss": 2.7325,
"step": 7760
},
{
"epoch": 0.81,
"learning_rate": 9.094506196992564e-06,
"loss": 2.7285,
"step": 7765
},
{
"epoch": 0.81,
"learning_rate": 9.047129343454268e-06,
"loss": 2.6496,
"step": 7770
},
{
"epoch": 0.81,
"learning_rate": 8.999863936728287e-06,
"loss": 2.6678,
"step": 7775
},
{
"epoch": 0.81,
"learning_rate": 8.952710105440027e-06,
"loss": 2.7479,
"step": 7780
},
{
"epoch": 0.81,
"learning_rate": 8.905667977911225e-06,
"loss": 2.655,
"step": 7785
},
{
"epoch": 0.81,
"learning_rate": 8.858737682159663e-06,
"loss": 2.7809,
"step": 7790
},
{
"epoch": 0.81,
"learning_rate": 8.811919345898772e-06,
"loss": 2.6838,
"step": 7795
},
{
"epoch": 0.81,
"learning_rate": 8.765213096537317e-06,
"loss": 2.6438,
"step": 7800
},
{
"epoch": 0.81,
"learning_rate": 8.718619061179028e-06,
"loss": 2.6506,
"step": 7805
},
{
"epoch": 0.81,
"learning_rate": 8.672137366622252e-06,
"loss": 2.6374,
"step": 7810
},
{
"epoch": 0.81,
"learning_rate": 8.625768139359631e-06,
"loss": 2.7031,
"step": 7815
},
{
"epoch": 0.81,
"learning_rate": 8.579511505577759e-06,
"loss": 2.672,
"step": 7820
},
{
"epoch": 0.81,
"learning_rate": 8.533367591156778e-06,
"loss": 2.762,
"step": 7825
},
{
"epoch": 0.81,
"learning_rate": 8.487336521670141e-06,
"loss": 2.7331,
"step": 7830
},
{
"epoch": 0.81,
"learning_rate": 8.441418422384151e-06,
"loss": 2.7531,
"step": 7835
},
{
"epoch": 0.81,
"learning_rate": 8.39561341825773e-06,
"loss": 2.6026,
"step": 7840
},
{
"epoch": 0.82,
"learning_rate": 8.349921633942021e-06,
"loss": 2.6804,
"step": 7845
},
{
"epoch": 0.82,
"learning_rate": 8.304343193780017e-06,
"loss": 2.6896,
"step": 7850
},
{
"epoch": 0.82,
"learning_rate": 8.258878221806326e-06,
"loss": 2.5729,
"step": 7855
},
{
"epoch": 0.82,
"learning_rate": 8.213526841746699e-06,
"loss": 2.6521,
"step": 7860
},
{
"epoch": 0.82,
"learning_rate": 8.168289177017824e-06,
"loss": 2.6304,
"step": 7865
},
{
"epoch": 0.82,
"learning_rate": 8.123165350726919e-06,
"loss": 2.6097,
"step": 7870
},
{
"epoch": 0.82,
"learning_rate": 8.078155485671357e-06,
"loss": 2.6809,
"step": 7875
},
{
"epoch": 0.82,
"learning_rate": 8.033259704338475e-06,
"loss": 2.715,
"step": 7880
},
{
"epoch": 0.82,
"learning_rate": 7.988478128905063e-06,
"loss": 2.6814,
"step": 7885
},
{
"epoch": 0.82,
"learning_rate": 7.943810881237163e-06,
"loss": 2.6426,
"step": 7890
},
{
"epoch": 0.82,
"learning_rate": 7.899258082889688e-06,
"loss": 2.7253,
"step": 7895
},
{
"epoch": 0.82,
"learning_rate": 7.854819855106078e-06,
"loss": 2.6656,
"step": 7900
},
{
"epoch": 0.82,
"learning_rate": 7.81049631881801e-06,
"loss": 2.6812,
"step": 7905
},
{
"epoch": 0.82,
"learning_rate": 7.766287594645017e-06,
"loss": 2.6192,
"step": 7910
},
{
"epoch": 0.82,
"learning_rate": 7.722193802894217e-06,
"loss": 2.6756,
"step": 7915
},
{
"epoch": 0.82,
"learning_rate": 7.678215063559956e-06,
"loss": 2.6087,
"step": 7920
},
{
"epoch": 0.82,
"learning_rate": 7.63435149632345e-06,
"loss": 2.5794,
"step": 7925
},
{
"epoch": 0.82,
"learning_rate": 7.590603220552539e-06,
"loss": 2.6084,
"step": 7930
},
{
"epoch": 0.82,
"learning_rate": 7.546970355301264e-06,
"loss": 2.76,
"step": 7935
},
{
"epoch": 0.83,
"learning_rate": 7.5034530193096475e-06,
"loss": 2.653,
"step": 7940
},
{
"epoch": 0.83,
"learning_rate": 7.460051331003287e-06,
"loss": 2.5695,
"step": 7945
},
{
"epoch": 0.83,
"learning_rate": 7.416765408493042e-06,
"loss": 2.6564,
"step": 7950
},
{
"epoch": 0.83,
"learning_rate": 7.373595369574804e-06,
"loss": 2.6538,
"step": 7955
},
{
"epoch": 0.83,
"learning_rate": 7.33054133172903e-06,
"loss": 2.6925,
"step": 7960
},
{
"epoch": 0.83,
"learning_rate": 7.287603412120531e-06,
"loss": 2.635,
"step": 7965
},
{
"epoch": 0.83,
"learning_rate": 7.244781727598137e-06,
"loss": 2.6893,
"step": 7970
},
{
"epoch": 0.83,
"learning_rate": 7.202076394694324e-06,
"loss": 2.6469,
"step": 7975
},
{
"epoch": 0.83,
"learning_rate": 7.159487529624975e-06,
"loss": 2.6708,
"step": 7980
},
{
"epoch": 0.83,
"learning_rate": 7.117015248288977e-06,
"loss": 2.7138,
"step": 7985
},
{
"epoch": 0.83,
"learning_rate": 7.074659666268002e-06,
"loss": 2.7588,
"step": 7990
},
{
"epoch": 0.83,
"learning_rate": 7.03242089882612e-06,
"loss": 2.702,
"step": 7995
},
{
"epoch": 0.83,
"learning_rate": 6.990299060909494e-06,
"loss": 2.5666,
"step": 8000
},
{
"epoch": 0.83,
"learning_rate": 6.948294267146121e-06,
"loss": 2.5574,
"step": 8005
},
{
"epoch": 0.83,
"learning_rate": 6.906406631845436e-06,
"loss": 2.5959,
"step": 8010
},
{
"epoch": 0.83,
"learning_rate": 6.86463626899807e-06,
"loss": 2.6754,
"step": 8015
},
{
"epoch": 0.83,
"learning_rate": 6.822983292275525e-06,
"loss": 2.6588,
"step": 8020
},
{
"epoch": 0.83,
"learning_rate": 6.781447815029818e-06,
"loss": 2.7039,
"step": 8025
},
{
"epoch": 0.83,
"learning_rate": 6.740029950293269e-06,
"loss": 2.714,
"step": 8030
},
{
"epoch": 0.84,
"learning_rate": 6.698729810778065e-06,
"loss": 2.6163,
"step": 8035
},
{
"epoch": 0.84,
"learning_rate": 6.657547508876078e-06,
"loss": 2.6831,
"step": 8040
},
{
"epoch": 0.84,
"learning_rate": 6.616483156658476e-06,
"loss": 2.5904,
"step": 8045
},
{
"epoch": 0.84,
"learning_rate": 6.575536865875443e-06,
"loss": 2.6415,
"step": 8050
},
{
"epoch": 0.84,
"learning_rate": 6.534708747955903e-06,
"loss": 2.6575,
"step": 8055
},
{
"epoch": 0.84,
"learning_rate": 6.493998914007149e-06,
"loss": 2.7303,
"step": 8060
},
{
"epoch": 0.84,
"learning_rate": 6.453407474814616e-06,
"loss": 2.8081,
"step": 8065
},
{
"epoch": 0.84,
"learning_rate": 6.412934540841553e-06,
"loss": 2.7649,
"step": 8070
},
{
"epoch": 0.84,
"learning_rate": 6.3725802222286805e-06,
"loss": 2.6959,
"step": 8075
},
{
"epoch": 0.84,
"learning_rate": 6.332344628793957e-06,
"loss": 2.6578,
"step": 8080
},
{
"epoch": 0.84,
"learning_rate": 6.292227870032225e-06,
"loss": 2.6482,
"step": 8085
},
{
"epoch": 0.84,
"learning_rate": 6.2522300551149605e-06,
"loss": 2.6967,
"step": 8090
},
{
"epoch": 0.84,
"learning_rate": 6.212351292889945e-06,
"loss": 2.658,
"step": 8095
},
{
"epoch": 0.84,
"learning_rate": 6.172591691880947e-06,
"loss": 2.6371,
"step": 8100
},
{
"epoch": 0.84,
"learning_rate": 6.1329513602875195e-06,
"loss": 2.7831,
"step": 8105
},
{
"epoch": 0.84,
"learning_rate": 6.093430405984585e-06,
"loss": 2.7056,
"step": 8110
},
{
"epoch": 0.84,
"learning_rate": 6.054028936522216e-06,
"loss": 2.6251,
"step": 8115
},
{
"epoch": 0.84,
"learning_rate": 6.014747059125353e-06,
"loss": 2.6027,
"step": 8120
},
{
"epoch": 0.84,
"learning_rate": 5.975584880693436e-06,
"loss": 2.746,
"step": 8125
},
{
"epoch": 0.84,
"learning_rate": 5.936542507800208e-06,
"loss": 2.6251,
"step": 8130
},
{
"epoch": 0.85,
"learning_rate": 5.8976200466933475e-06,
"loss": 2.6649,
"step": 8135
},
{
"epoch": 0.85,
"learning_rate": 5.8588176032942255e-06,
"loss": 2.6781,
"step": 8140
},
{
"epoch": 0.85,
"learning_rate": 5.820135283197625e-06,
"loss": 2.6556,
"step": 8145
},
{
"epoch": 0.85,
"learning_rate": 5.781573191671386e-06,
"loss": 2.6979,
"step": 8150
},
{
"epoch": 0.85,
"learning_rate": 5.743131433656207e-06,
"loss": 2.6225,
"step": 8155
},
{
"epoch": 0.85,
"learning_rate": 5.704810113765291e-06,
"loss": 2.7288,
"step": 8160
},
{
"epoch": 0.85,
"learning_rate": 5.666609336284096e-06,
"loss": 2.7051,
"step": 8165
},
{
"epoch": 0.85,
"learning_rate": 5.628529205170063e-06,
"loss": 2.615,
"step": 8170
},
{
"epoch": 0.85,
"learning_rate": 5.590569824052266e-06,
"loss": 2.6922,
"step": 8175
},
{
"epoch": 0.85,
"learning_rate": 5.552731296231239e-06,
"loss": 2.7271,
"step": 8180
},
{
"epoch": 0.85,
"learning_rate": 5.515013724678569e-06,
"loss": 2.5531,
"step": 8185
},
{
"epoch": 0.85,
"learning_rate": 5.477417212036723e-06,
"loss": 2.6463,
"step": 8190
},
{
"epoch": 0.85,
"learning_rate": 5.4399418606187195e-06,
"loss": 2.5999,
"step": 8195
},
{
"epoch": 0.85,
"learning_rate": 5.402587772407824e-06,
"loss": 2.7702,
"step": 8200
},
{
"epoch": 0.85,
"learning_rate": 5.365355049057358e-06,
"loss": 2.6789,
"step": 8205
},
{
"epoch": 0.85,
"learning_rate": 5.328243791890303e-06,
"loss": 2.728,
"step": 8210
},
{
"epoch": 0.85,
"learning_rate": 5.291254101899146e-06,
"loss": 2.6632,
"step": 8215
},
{
"epoch": 0.85,
"learning_rate": 5.254386079745527e-06,
"loss": 2.6963,
"step": 8220
},
{
"epoch": 0.85,
"learning_rate": 5.217639825759962e-06,
"loss": 2.7014,
"step": 8225
},
{
"epoch": 0.86,
"learning_rate": 5.181015439941645e-06,
"loss": 2.6466,
"step": 8230
},
{
"epoch": 0.86,
"learning_rate": 5.14451302195807e-06,
"loss": 2.6835,
"step": 8235
},
{
"epoch": 0.86,
"learning_rate": 5.108132671144849e-06,
"loss": 2.7248,
"step": 8240
},
{
"epoch": 0.86,
"learning_rate": 5.071874486505412e-06,
"loss": 2.6223,
"step": 8245
},
{
"epoch": 0.86,
"learning_rate": 5.0357385667106906e-06,
"loss": 2.63,
"step": 8250
},
{
"epoch": 0.86,
"learning_rate": 4.99972501009896e-06,
"loss": 2.6073,
"step": 8255
},
{
"epoch": 0.86,
"learning_rate": 4.963833914675431e-06,
"loss": 2.6914,
"step": 8260
},
{
"epoch": 0.86,
"learning_rate": 4.928065378112106e-06,
"loss": 2.6781,
"step": 8265
},
{
"epoch": 0.86,
"learning_rate": 4.892419497747447e-06,
"loss": 2.6237,
"step": 8270
},
{
"epoch": 0.86,
"learning_rate": 4.8568963705861114e-06,
"loss": 2.6735,
"step": 8275
},
{
"epoch": 0.86,
"learning_rate": 4.821496093298733e-06,
"loss": 2.5665,
"step": 8280
},
{
"epoch": 0.86,
"learning_rate": 4.786218762221595e-06,
"loss": 2.5667,
"step": 8285
},
{
"epoch": 0.86,
"learning_rate": 4.751064473356431e-06,
"loss": 2.627,
"step": 8290
},
{
"epoch": 0.86,
"learning_rate": 4.716033322370123e-06,
"loss": 2.6096,
"step": 8295
},
{
"epoch": 0.86,
"learning_rate": 4.6811254045944445e-06,
"loss": 2.6634,
"step": 8300
},
{
"epoch": 0.86,
"learning_rate": 4.646340815025824e-06,
"loss": 2.7446,
"step": 8305
},
{
"epoch": 0.86,
"learning_rate": 4.611679648325057e-06,
"loss": 2.6607,
"step": 8310
},
{
"epoch": 0.86,
"learning_rate": 4.577141998817075e-06,
"loss": 2.7073,
"step": 8315
},
{
"epoch": 0.86,
"learning_rate": 4.542727960490684e-06,
"loss": 2.6358,
"step": 8320
},
{
"epoch": 0.87,
"learning_rate": 4.508437626998269e-06,
"loss": 2.6461,
"step": 8325
},
{
"epoch": 0.87,
"learning_rate": 4.474271091655602e-06,
"loss": 2.601,
"step": 8330
},
{
"epoch": 0.87,
"learning_rate": 4.440228447441552e-06,
"loss": 2.598,
"step": 8335
},
{
"epoch": 0.87,
"learning_rate": 4.406309786997825e-06,
"loss": 2.7094,
"step": 8340
},
{
"epoch": 0.87,
"learning_rate": 4.372515202628758e-06,
"loss": 2.6154,
"step": 8345
},
{
"epoch": 0.87,
"learning_rate": 4.338844786300977e-06,
"loss": 2.7038,
"step": 8350
},
{
"epoch": 0.87,
"learning_rate": 4.3052986296432615e-06,
"loss": 2.4719,
"step": 8355
},
{
"epoch": 0.87,
"learning_rate": 4.271876823946203e-06,
"loss": 2.607,
"step": 8360
},
{
"epoch": 0.87,
"learning_rate": 4.238579460161996e-06,
"loss": 2.6718,
"step": 8365
},
{
"epoch": 0.87,
"learning_rate": 4.205406628904213e-06,
"loss": 2.7191,
"step": 8370
},
{
"epoch": 0.87,
"learning_rate": 4.172358420447492e-06,
"loss": 2.6425,
"step": 8375
},
{
"epoch": 0.87,
"learning_rate": 4.139434924727359e-06,
"loss": 2.7353,
"step": 8380
},
{
"epoch": 0.87,
"learning_rate": 4.106636231339922e-06,
"loss": 2.6488,
"step": 8385
},
{
"epoch": 0.87,
"learning_rate": 4.0739624295417e-06,
"loss": 2.6531,
"step": 8390
},
{
"epoch": 0.87,
"learning_rate": 4.041413608249312e-06,
"loss": 2.6685,
"step": 8395
},
{
"epoch": 0.87,
"learning_rate": 4.008989856039264e-06,
"loss": 2.6027,
"step": 8400
},
{
"epoch": 0.87,
"learning_rate": 3.976691261147714e-06,
"loss": 2.6912,
"step": 8405
},
{
"epoch": 0.87,
"learning_rate": 3.944517911470219e-06,
"loss": 2.7016,
"step": 8410
},
{
"epoch": 0.87,
"learning_rate": 3.9124698945615046e-06,
"loss": 2.7254,
"step": 8415
},
{
"epoch": 0.88,
"learning_rate": 3.880547297635234e-06,
"loss": 2.6916,
"step": 8420
},
{
"epoch": 0.88,
"learning_rate": 3.848750207563717e-06,
"loss": 2.7128,
"step": 8425
},
{
"epoch": 0.88,
"learning_rate": 3.817078710877775e-06,
"loss": 2.6661,
"step": 8430
},
{
"epoch": 0.88,
"learning_rate": 3.7855328937663914e-06,
"loss": 2.6837,
"step": 8435
},
{
"epoch": 0.88,
"learning_rate": 3.7541128420765734e-06,
"loss": 2.579,
"step": 8440
},
{
"epoch": 0.88,
"learning_rate": 3.7228186413130637e-06,
"loss": 2.6048,
"step": 8445
},
{
"epoch": 0.88,
"learning_rate": 3.6916503766381116e-06,
"loss": 2.6379,
"step": 8450
},
{
"epoch": 0.88,
"learning_rate": 3.6606081328712683e-06,
"loss": 2.672,
"step": 8455
},
{
"epoch": 0.88,
"learning_rate": 3.629691994489115e-06,
"loss": 2.6581,
"step": 8460
},
{
"epoch": 0.88,
"learning_rate": 3.5989020456250854e-06,
"loss": 2.6812,
"step": 8465
},
{
"epoch": 0.88,
"learning_rate": 3.5682383700691933e-06,
"loss": 2.6897,
"step": 8470
},
{
"epoch": 0.88,
"learning_rate": 3.5377010512678167e-06,
"loss": 2.7001,
"step": 8475
},
{
"epoch": 0.88,
"learning_rate": 3.507290172323474e-06,
"loss": 2.538,
"step": 8480
},
{
"epoch": 0.88,
"learning_rate": 3.4770058159946106e-06,
"loss": 2.5869,
"step": 8485
},
{
"epoch": 0.88,
"learning_rate": 3.4468480646953404e-06,
"loss": 2.7001,
"step": 8490
},
{
"epoch": 0.88,
"learning_rate": 3.4168170004952706e-06,
"loss": 2.7428,
"step": 8495
},
{
"epoch": 0.88,
"learning_rate": 3.3869127051192105e-06,
"loss": 2.5437,
"step": 8500
},
{
"epoch": 0.88,
"learning_rate": 3.3571352599470253e-06,
"loss": 2.6463,
"step": 8505
},
{
"epoch": 0.88,
"learning_rate": 3.3274847460133372e-06,
"loss": 2.5449,
"step": 8510
},
{
"epoch": 0.88,
"learning_rate": 3.2979612440073857e-06,
"loss": 2.6831,
"step": 8515
},
{
"epoch": 0.89,
"learning_rate": 3.2685648342727527e-06,
"loss": 2.5288,
"step": 8520
},
{
"epoch": 0.89,
"learning_rate": 3.2392955968071404e-06,
"loss": 2.6896,
"step": 8525
},
{
"epoch": 0.89,
"learning_rate": 3.2101536112622064e-06,
"loss": 2.5555,
"step": 8530
},
{
"epoch": 0.89,
"learning_rate": 3.1811389569432692e-06,
"loss": 2.6271,
"step": 8535
},
{
"epoch": 0.89,
"learning_rate": 3.1522517128091677e-06,
"loss": 2.6316,
"step": 8540
},
{
"epoch": 0.89,
"learning_rate": 3.1234919574720135e-06,
"loss": 2.7416,
"step": 8545
},
{
"epoch": 0.89,
"learning_rate": 3.094859769196956e-06,
"loss": 2.689,
"step": 8550
},
{
"epoch": 0.89,
"learning_rate": 3.0663552259020067e-06,
"loss": 2.6571,
"step": 8555
},
{
"epoch": 0.89,
"learning_rate": 3.0379784051578097e-06,
"loss": 2.7731,
"step": 8560
},
{
"epoch": 0.89,
"learning_rate": 3.009729384187432e-06,
"loss": 2.668,
"step": 8565
},
{
"epoch": 0.89,
"learning_rate": 2.981608239866157e-06,
"loss": 2.6377,
"step": 8570
},
{
"epoch": 0.89,
"learning_rate": 2.953615048721253e-06,
"loss": 2.7347,
"step": 8575
},
{
"epoch": 0.89,
"learning_rate": 2.925749886931817e-06,
"loss": 2.6793,
"step": 8580
},
{
"epoch": 0.89,
"learning_rate": 2.898012830328495e-06,
"loss": 2.7375,
"step": 8585
},
{
"epoch": 0.89,
"learning_rate": 2.8704039543933416e-06,
"loss": 2.6779,
"step": 8590
},
{
"epoch": 0.89,
"learning_rate": 2.8429233342595894e-06,
"loss": 2.595,
"step": 8595
},
{
"epoch": 0.89,
"learning_rate": 2.8155710447114236e-06,
"loss": 2.5455,
"step": 8600
},
{
"epoch": 0.89,
"learning_rate": 2.788347160183824e-06,
"loss": 2.6063,
"step": 8605
},
{
"epoch": 0.89,
"learning_rate": 2.761251754762295e-06,
"loss": 2.741,
"step": 8610
},
{
"epoch": 0.9,
"learning_rate": 2.7342849021827544e-06,
"loss": 2.6691,
"step": 8615
},
{
"epoch": 0.9,
"learning_rate": 2.70744667583126e-06,
"loss": 2.6946,
"step": 8620
},
{
"epoch": 0.9,
"learning_rate": 2.680737148743817e-06,
"loss": 2.6183,
"step": 8625
},
{
"epoch": 0.9,
"learning_rate": 2.654156393606222e-06,
"loss": 2.7315,
"step": 8630
},
{
"epoch": 0.9,
"learning_rate": 2.6277044827538346e-06,
"loss": 2.7589,
"step": 8635
},
{
"epoch": 0.9,
"learning_rate": 2.6013814881713784e-06,
"loss": 2.6948,
"step": 8640
},
{
"epoch": 0.9,
"learning_rate": 2.5751874814927522e-06,
"loss": 2.7164,
"step": 8645
},
{
"epoch": 0.9,
"learning_rate": 2.5491225340008306e-06,
"loss": 2.6754,
"step": 8650
},
{
"epoch": 0.9,
"learning_rate": 2.5231867166272905e-06,
"loss": 2.7137,
"step": 8655
},
{
"epoch": 0.9,
"learning_rate": 2.4973800999523788e-06,
"loss": 2.5459,
"step": 8660
},
{
"epoch": 0.9,
"learning_rate": 2.471702754204752e-06,
"loss": 2.7451,
"step": 8665
},
{
"epoch": 0.9,
"learning_rate": 2.4461547492612923e-06,
"loss": 2.7055,
"step": 8670
},
{
"epoch": 0.9,
"learning_rate": 2.4207361546468744e-06,
"loss": 2.6783,
"step": 8675
},
{
"epoch": 0.9,
"learning_rate": 2.395447039534221e-06,
"loss": 2.6795,
"step": 8680
},
{
"epoch": 0.9,
"learning_rate": 2.3702874727436876e-06,
"loss": 2.6799,
"step": 8685
},
{
"epoch": 0.9,
"learning_rate": 2.3452575227430885e-06,
"loss": 2.7297,
"step": 8690
},
{
"epoch": 0.9,
"learning_rate": 2.32035725764751e-06,
"loss": 2.697,
"step": 8695
},
{
"epoch": 0.9,
"learning_rate": 2.295586745219108e-06,
"loss": 2.6881,
"step": 8700
},
{
"epoch": 0.9,
"learning_rate": 2.2709460528669513e-06,
"loss": 2.6299,
"step": 8705
},
{
"epoch": 0.91,
"learning_rate": 2.246435247646822e-06,
"loss": 2.6045,
"step": 8710
},
{
"epoch": 0.91,
"learning_rate": 2.222054396261025e-06,
"loss": 2.6632,
"step": 8715
},
{
"epoch": 0.91,
"learning_rate": 2.197803565058232e-06,
"loss": 2.6285,
"step": 8720
},
{
"epoch": 0.91,
"learning_rate": 2.1736828200332625e-06,
"loss": 2.6616,
"step": 8725
},
{
"epoch": 0.91,
"learning_rate": 2.1496922268269493e-06,
"loss": 2.5761,
"step": 8730
},
{
"epoch": 0.91,
"learning_rate": 2.125831850725918e-06,
"loss": 2.6676,
"step": 8735
},
{
"epoch": 0.91,
"learning_rate": 2.1021017566624447e-06,
"loss": 2.6343,
"step": 8740
},
{
"epoch": 0.91,
"learning_rate": 2.0785020092142672e-06,
"loss": 2.5761,
"step": 8745
},
{
"epoch": 0.91,
"learning_rate": 2.0550326726043734e-06,
"loss": 2.6627,
"step": 8750
},
{
"epoch": 0.91,
"learning_rate": 2.031693810700902e-06,
"loss": 2.6591,
"step": 8755
},
{
"epoch": 0.91,
"learning_rate": 2.0084854870168868e-06,
"loss": 2.7405,
"step": 8760
},
{
"epoch": 0.91,
"learning_rate": 1.985407764710151e-06,
"loss": 2.5982,
"step": 8765
},
{
"epoch": 0.91,
"learning_rate": 1.962460706583097e-06,
"loss": 2.6419,
"step": 8770
},
{
"epoch": 0.91,
"learning_rate": 1.939644375082528e-06,
"loss": 2.6681,
"step": 8775
},
{
"epoch": 0.91,
"learning_rate": 1.9169588322995314e-06,
"loss": 2.6482,
"step": 8780
},
{
"epoch": 0.91,
"learning_rate": 1.8944041399692248e-06,
"loss": 2.6907,
"step": 8785
},
{
"epoch": 0.91,
"learning_rate": 1.871980359470693e-06,
"loss": 2.6878,
"step": 8790
},
{
"epoch": 0.91,
"learning_rate": 1.8496875518267286e-06,
"loss": 2.5733,
"step": 8795
},
{
"epoch": 0.91,
"learning_rate": 1.8275257777036981e-06,
"loss": 2.6682,
"step": 8800
},
{
"epoch": 0.92,
"learning_rate": 1.8054950974114137e-06,
"loss": 2.6664,
"step": 8805
},
{
"epoch": 0.92,
"learning_rate": 1.7835955709028907e-06,
"loss": 2.6685,
"step": 8810
},
{
"epoch": 0.92,
"learning_rate": 1.7618272577742733e-06,
"loss": 2.7395,
"step": 8815
},
{
"epoch": 0.92,
"learning_rate": 1.7401902172646034e-06,
"loss": 2.5314,
"step": 8820
},
{
"epoch": 0.92,
"learning_rate": 1.7186845082556914e-06,
"loss": 2.7153,
"step": 8825
},
{
"epoch": 0.92,
"learning_rate": 1.6973101892719556e-06,
"loss": 2.6866,
"step": 8830
},
{
"epoch": 0.92,
"learning_rate": 1.6760673184802401e-06,
"loss": 2.7431,
"step": 8835
},
{
"epoch": 0.92,
"learning_rate": 1.6549559536896964e-06,
"loss": 2.6594,
"step": 8840
},
{
"epoch": 0.92,
"learning_rate": 1.6339761523515907e-06,
"loss": 2.5953,
"step": 8845
},
{
"epoch": 0.92,
"learning_rate": 1.6131279715591584e-06,
"loss": 2.689,
"step": 8850
},
{
"epoch": 0.92,
"learning_rate": 1.59241146804745e-06,
"loss": 2.7079,
"step": 8855
},
{
"epoch": 0.92,
"learning_rate": 1.571826698193174e-06,
"loss": 2.61,
"step": 8860
},
{
"epoch": 0.92,
"learning_rate": 1.5513737180145705e-06,
"loss": 2.722,
"step": 8865
},
{
"epoch": 0.92,
"learning_rate": 1.5310525831711996e-06,
"loss": 2.7421,
"step": 8870
},
{
"epoch": 0.92,
"learning_rate": 1.510863348963848e-06,
"loss": 2.6717,
"step": 8875
},
{
"epoch": 0.92,
"learning_rate": 1.4908060703343495e-06,
"loss": 2.6859,
"step": 8880
},
{
"epoch": 0.92,
"learning_rate": 1.4708808018654374e-06,
"loss": 2.6463,
"step": 8885
},
{
"epoch": 0.92,
"learning_rate": 1.4510875977806092e-06,
"loss": 2.6017,
"step": 8890
},
{
"epoch": 0.92,
"learning_rate": 1.4314265119439618e-06,
"loss": 2.6356,
"step": 8895
},
{
"epoch": 0.92,
"learning_rate": 1.4118975978600513e-06,
"loss": 2.6506,
"step": 8900
},
{
"epoch": 0.93,
"learning_rate": 1.3925009086737661e-06,
"loss": 2.5969,
"step": 8905
},
{
"epoch": 0.93,
"learning_rate": 1.373236497170133e-06,
"loss": 2.6968,
"step": 8910
},
{
"epoch": 0.93,
"learning_rate": 1.3541044157742378e-06,
"loss": 2.6659,
"step": 8915
},
{
"epoch": 0.93,
"learning_rate": 1.3351047165510444e-06,
"loss": 2.6625,
"step": 8920
},
{
"epoch": 0.93,
"learning_rate": 1.3162374512052433e-06,
"loss": 2.6194,
"step": 8925
},
{
"epoch": 0.93,
"learning_rate": 1.297502671081141e-06,
"loss": 2.6823,
"step": 8930
},
{
"epoch": 0.93,
"learning_rate": 1.2789004271624884e-06,
"loss": 2.7428,
"step": 8935
},
{
"epoch": 0.93,
"learning_rate": 1.2604307700724017e-06,
"loss": 2.7062,
"step": 8940
},
{
"epoch": 0.93,
"learning_rate": 1.242093750073131e-06,
"loss": 2.7765,
"step": 8945
},
{
"epoch": 0.93,
"learning_rate": 1.2238894170660098e-06,
"loss": 2.7835,
"step": 8950
},
{
"epoch": 0.93,
"learning_rate": 1.2058178205912762e-06,
"loss": 2.5195,
"step": 8955
},
{
"epoch": 0.93,
"learning_rate": 1.1878790098279358e-06,
"loss": 2.7257,
"step": 8960
},
{
"epoch": 0.93,
"learning_rate": 1.170073033593655e-06,
"loss": 2.6542,
"step": 8965
},
{
"epoch": 0.93,
"learning_rate": 1.1523999403446007e-06,
"loss": 2.6421,
"step": 8970
},
{
"epoch": 0.93,
"learning_rate": 1.134859778175329e-06,
"loss": 2.7165,
"step": 8975
},
{
"epoch": 0.93,
"learning_rate": 1.1174525948186354e-06,
"loss": 2.5637,
"step": 8980
},
{
"epoch": 0.93,
"learning_rate": 1.1001784376454326e-06,
"loss": 2.6663,
"step": 8985
},
{
"epoch": 0.93,
"learning_rate": 1.0830373536646343e-06,
"loss": 2.6662,
"step": 8990
},
{
"epoch": 0.93,
"learning_rate": 1.0660293895230156e-06,
"loss": 2.5877,
"step": 8995
},
{
"epoch": 0.94,
"learning_rate": 1.0491545915050804e-06,
"loss": 2.7158,
"step": 9000
}
],
"logging_steps": 5,
"max_steps": 9622,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 1.348118192848896e+18,
"trial_name": null,
"trial_params": null
}