2560-stage-3-1024 / trainer_state.json
gowitheflow1998
upload checkpoint
ddf3694
{
"best_metric": 0.7822743590634694,
"best_model_checkpoint": "./checkpoints/clip-stage3pa-1024/checkpoint-3810",
"epoch": 0.7486430844095078,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.5e-06,
"loss": 0.727,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 3e-06,
"loss": 0.6697,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.5e-06,
"loss": 0.6164,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 6e-06,
"loss": 0.5883,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 7.5e-06,
"loss": 0.5711,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 9e-06,
"loss": 0.5863,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 1.05e-05,
"loss": 0.5441,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 1.2e-05,
"loss": 0.5303,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 1.3500000000000001e-05,
"loss": 0.5436,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 1.5e-05,
"loss": 0.5507,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 1.65e-05,
"loss": 0.5534,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 1.8e-05,
"loss": 0.5384,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 1.95e-05,
"loss": 0.5533,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 2.1e-05,
"loss": 0.5475,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 2.25e-05,
"loss": 0.5482,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 2.4e-05,
"loss": 0.5099,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 2.55e-05,
"loss": 0.539,
"step": 170
},
{
"epoch": 0.03,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.5176,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 2.8499999999999998e-05,
"loss": 0.526,
"step": 190
},
{
"epoch": 0.04,
"learning_rate": 3e-05,
"loss": 0.5276,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 2.992105263157895e-05,
"loss": 0.5077,
"step": 210
},
{
"epoch": 0.04,
"learning_rate": 2.9842105263157894e-05,
"loss": 0.5375,
"step": 220
},
{
"epoch": 0.04,
"learning_rate": 2.9763157894736842e-05,
"loss": 0.5411,
"step": 230
},
{
"epoch": 0.04,
"learning_rate": 2.968421052631579e-05,
"loss": 0.4877,
"step": 240
},
{
"epoch": 0.05,
"learning_rate": 2.9605263157894735e-05,
"loss": 0.5347,
"step": 250
},
{
"epoch": 0.05,
"learning_rate": 2.9526315789473684e-05,
"loss": 0.4839,
"step": 260
},
{
"epoch": 0.05,
"learning_rate": 2.9447368421052635e-05,
"loss": 0.4936,
"step": 270
},
{
"epoch": 0.05,
"learning_rate": 2.936842105263158e-05,
"loss": 0.4849,
"step": 280
},
{
"epoch": 0.05,
"learning_rate": 2.928947368421053e-05,
"loss": 0.4696,
"step": 290
},
{
"epoch": 0.06,
"learning_rate": 2.9210526315789474e-05,
"loss": 0.4872,
"step": 300
},
{
"epoch": 0.06,
"learning_rate": 2.9131578947368422e-05,
"loss": 0.4636,
"step": 310
},
{
"epoch": 0.06,
"learning_rate": 2.905263157894737e-05,
"loss": 0.4728,
"step": 320
},
{
"epoch": 0.06,
"learning_rate": 2.8973684210526315e-05,
"loss": 0.4743,
"step": 330
},
{
"epoch": 0.06,
"learning_rate": 2.8894736842105263e-05,
"loss": 0.4774,
"step": 340
},
{
"epoch": 0.07,
"learning_rate": 2.8815789473684212e-05,
"loss": 0.4372,
"step": 350
},
{
"epoch": 0.07,
"learning_rate": 2.8736842105263157e-05,
"loss": 0.4515,
"step": 360
},
{
"epoch": 0.07,
"learning_rate": 2.8657894736842105e-05,
"loss": 0.422,
"step": 370
},
{
"epoch": 0.07,
"learning_rate": 2.8578947368421053e-05,
"loss": 0.4505,
"step": 380
},
{
"epoch": 0.07,
"learning_rate": 2.8499999999999998e-05,
"loss": 0.4383,
"step": 390
},
{
"epoch": 0.07,
"learning_rate": 2.8421052631578946e-05,
"loss": 0.4601,
"step": 400
},
{
"epoch": 0.08,
"learning_rate": 2.8342105263157898e-05,
"loss": 0.4327,
"step": 410
},
{
"epoch": 0.08,
"learning_rate": 2.8263157894736843e-05,
"loss": 0.4182,
"step": 420
},
{
"epoch": 0.08,
"learning_rate": 2.818421052631579e-05,
"loss": 0.4421,
"step": 430
},
{
"epoch": 0.08,
"learning_rate": 2.810526315789474e-05,
"loss": 0.427,
"step": 440
},
{
"epoch": 0.08,
"learning_rate": 2.8026315789473685e-05,
"loss": 0.4397,
"step": 450
},
{
"epoch": 0.09,
"learning_rate": 2.7947368421052633e-05,
"loss": 0.4249,
"step": 460
},
{
"epoch": 0.09,
"learning_rate": 2.786842105263158e-05,
"loss": 0.432,
"step": 470
},
{
"epoch": 0.09,
"learning_rate": 2.7789473684210526e-05,
"loss": 0.4264,
"step": 480
},
{
"epoch": 0.09,
"learning_rate": 2.7710526315789474e-05,
"loss": 0.4325,
"step": 490
},
{
"epoch": 0.09,
"learning_rate": 2.7631578947368423e-05,
"loss": 0.4204,
"step": 500
},
{
"epoch": 0.1,
"learning_rate": 2.7552631578947368e-05,
"loss": 0.4246,
"step": 510
},
{
"epoch": 0.1,
"learning_rate": 2.7473684210526316e-05,
"loss": 0.429,
"step": 520
},
{
"epoch": 0.1,
"learning_rate": 2.739473684210526e-05,
"loss": 0.4003,
"step": 530
},
{
"epoch": 0.1,
"learning_rate": 2.7315789473684213e-05,
"loss": 0.3946,
"step": 540
},
{
"epoch": 0.1,
"learning_rate": 2.723684210526316e-05,
"loss": 0.4035,
"step": 550
},
{
"epoch": 0.1,
"learning_rate": 2.7157894736842106e-05,
"loss": 0.384,
"step": 560
},
{
"epoch": 0.11,
"learning_rate": 2.7078947368421054e-05,
"loss": 0.4095,
"step": 570
},
{
"epoch": 0.11,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.386,
"step": 580
},
{
"epoch": 0.11,
"learning_rate": 2.6921052631578947e-05,
"loss": 0.374,
"step": 590
},
{
"epoch": 0.11,
"learning_rate": 2.6842105263157896e-05,
"loss": 0.4155,
"step": 600
},
{
"epoch": 0.11,
"learning_rate": 2.6763157894736844e-05,
"loss": 0.403,
"step": 610
},
{
"epoch": 0.12,
"learning_rate": 2.668421052631579e-05,
"loss": 0.3722,
"step": 620
},
{
"epoch": 0.12,
"learning_rate": 2.6605263157894737e-05,
"loss": 0.39,
"step": 630
},
{
"epoch": 0.12,
"learning_rate": 2.6526315789473685e-05,
"loss": 0.4041,
"step": 640
},
{
"epoch": 0.12,
"learning_rate": 2.644736842105263e-05,
"loss": 0.4011,
"step": 650
},
{
"epoch": 0.12,
"learning_rate": 2.636842105263158e-05,
"loss": 0.3887,
"step": 660
},
{
"epoch": 0.13,
"learning_rate": 2.6289473684210527e-05,
"loss": 0.3807,
"step": 670
},
{
"epoch": 0.13,
"learning_rate": 2.6210526315789475e-05,
"loss": 0.3762,
"step": 680
},
{
"epoch": 0.13,
"learning_rate": 2.6131578947368424e-05,
"loss": 0.3831,
"step": 690
},
{
"epoch": 0.13,
"learning_rate": 2.605263157894737e-05,
"loss": 0.3408,
"step": 700
},
{
"epoch": 0.13,
"learning_rate": 2.5973684210526317e-05,
"loss": 0.362,
"step": 710
},
{
"epoch": 0.13,
"learning_rate": 2.5894736842105265e-05,
"loss": 0.3606,
"step": 720
},
{
"epoch": 0.14,
"learning_rate": 2.581578947368421e-05,
"loss": 0.3475,
"step": 730
},
{
"epoch": 0.14,
"learning_rate": 2.5736842105263158e-05,
"loss": 0.3659,
"step": 740
},
{
"epoch": 0.14,
"learning_rate": 2.5657894736842107e-05,
"loss": 0.3391,
"step": 750
},
{
"epoch": 0.14,
"learning_rate": 2.557894736842105e-05,
"loss": 0.3744,
"step": 760
},
{
"epoch": 0.14,
"learning_rate": 2.55e-05,
"loss": 0.3609,
"step": 770
},
{
"epoch": 0.15,
"learning_rate": 2.5421052631578948e-05,
"loss": 0.3525,
"step": 780
},
{
"epoch": 0.15,
"learning_rate": 2.5342105263157893e-05,
"loss": 0.3527,
"step": 790
},
{
"epoch": 0.15,
"learning_rate": 2.526315789473684e-05,
"loss": 0.3566,
"step": 800
},
{
"epoch": 0.15,
"learning_rate": 2.518421052631579e-05,
"loss": 0.3327,
"step": 810
},
{
"epoch": 0.15,
"learning_rate": 2.5105263157894738e-05,
"loss": 0.3789,
"step": 820
},
{
"epoch": 0.16,
"learning_rate": 2.5026315789473686e-05,
"loss": 0.3415,
"step": 830
},
{
"epoch": 0.16,
"learning_rate": 2.4947368421052635e-05,
"loss": 0.3342,
"step": 840
},
{
"epoch": 0.16,
"learning_rate": 2.486842105263158e-05,
"loss": 0.3415,
"step": 850
},
{
"epoch": 0.16,
"learning_rate": 2.4789473684210528e-05,
"loss": 0.3562,
"step": 860
},
{
"epoch": 0.16,
"learning_rate": 2.4710526315789476e-05,
"loss": 0.3405,
"step": 870
},
{
"epoch": 0.16,
"learning_rate": 2.463157894736842e-05,
"loss": 0.368,
"step": 880
},
{
"epoch": 0.17,
"learning_rate": 2.455263157894737e-05,
"loss": 0.3547,
"step": 890
},
{
"epoch": 0.17,
"learning_rate": 2.4473684210526318e-05,
"loss": 0.3508,
"step": 900
},
{
"epoch": 0.17,
"learning_rate": 2.4394736842105262e-05,
"loss": 0.34,
"step": 910
},
{
"epoch": 0.17,
"learning_rate": 2.431578947368421e-05,
"loss": 0.3256,
"step": 920
},
{
"epoch": 0.17,
"learning_rate": 2.4236842105263156e-05,
"loss": 0.3515,
"step": 930
},
{
"epoch": 0.18,
"learning_rate": 2.4157894736842104e-05,
"loss": 0.316,
"step": 940
},
{
"epoch": 0.18,
"learning_rate": 2.4078947368421056e-05,
"loss": 0.3382,
"step": 950
},
{
"epoch": 0.18,
"learning_rate": 2.4e-05,
"loss": 0.3314,
"step": 960
},
{
"epoch": 0.18,
"learning_rate": 2.392105263157895e-05,
"loss": 0.3285,
"step": 970
},
{
"epoch": 0.18,
"learning_rate": 2.3842105263157897e-05,
"loss": 0.3472,
"step": 980
},
{
"epoch": 0.19,
"learning_rate": 2.3763157894736842e-05,
"loss": 0.3183,
"step": 990
},
{
"epoch": 0.19,
"learning_rate": 2.368421052631579e-05,
"loss": 0.332,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 2.360526315789474e-05,
"loss": 0.314,
"step": 1010
},
{
"epoch": 0.19,
"learning_rate": 2.3526315789473684e-05,
"loss": 0.3127,
"step": 1020
},
{
"epoch": 0.19,
"learning_rate": 2.3447368421052632e-05,
"loss": 0.3483,
"step": 1030
},
{
"epoch": 0.19,
"learning_rate": 2.336842105263158e-05,
"loss": 0.3408,
"step": 1040
},
{
"epoch": 0.2,
"learning_rate": 2.3289473684210525e-05,
"loss": 0.3406,
"step": 1050
},
{
"epoch": 0.2,
"learning_rate": 2.3210526315789473e-05,
"loss": 0.3205,
"step": 1060
},
{
"epoch": 0.2,
"learning_rate": 2.3131578947368422e-05,
"loss": 0.3135,
"step": 1070
},
{
"epoch": 0.2,
"learning_rate": 2.3052631578947367e-05,
"loss": 0.3107,
"step": 1080
},
{
"epoch": 0.2,
"learning_rate": 2.297368421052632e-05,
"loss": 0.3079,
"step": 1090
},
{
"epoch": 0.21,
"learning_rate": 2.2894736842105263e-05,
"loss": 0.3141,
"step": 1100
},
{
"epoch": 0.21,
"learning_rate": 2.281578947368421e-05,
"loss": 0.3186,
"step": 1110
},
{
"epoch": 0.21,
"learning_rate": 2.273684210526316e-05,
"loss": 0.3254,
"step": 1120
},
{
"epoch": 0.21,
"learning_rate": 2.2657894736842105e-05,
"loss": 0.3219,
"step": 1130
},
{
"epoch": 0.21,
"learning_rate": 2.2578947368421053e-05,
"loss": 0.3106,
"step": 1140
},
{
"epoch": 0.22,
"learning_rate": 2.25e-05,
"loss": 0.3042,
"step": 1150
},
{
"epoch": 0.22,
"learning_rate": 2.2421052631578946e-05,
"loss": 0.3154,
"step": 1160
},
{
"epoch": 0.22,
"learning_rate": 2.2342105263157895e-05,
"loss": 0.3147,
"step": 1170
},
{
"epoch": 0.22,
"learning_rate": 2.2263157894736843e-05,
"loss": 0.2916,
"step": 1180
},
{
"epoch": 0.22,
"learning_rate": 2.2184210526315788e-05,
"loss": 0.3159,
"step": 1190
},
{
"epoch": 0.22,
"learning_rate": 2.2105263157894736e-05,
"loss": 0.3084,
"step": 1200
},
{
"epoch": 0.23,
"learning_rate": 2.2026315789473684e-05,
"loss": 0.3071,
"step": 1210
},
{
"epoch": 0.23,
"learning_rate": 2.1947368421052633e-05,
"loss": 0.312,
"step": 1220
},
{
"epoch": 0.23,
"learning_rate": 2.186842105263158e-05,
"loss": 0.3018,
"step": 1230
},
{
"epoch": 0.23,
"learning_rate": 2.178947368421053e-05,
"loss": 0.3195,
"step": 1240
},
{
"epoch": 0.23,
"learning_rate": 2.1710526315789474e-05,
"loss": 0.3115,
"step": 1250
},
{
"epoch": 0.24,
"learning_rate": 2.1631578947368423e-05,
"loss": 0.2827,
"step": 1260
},
{
"epoch": 0.24,
"learning_rate": 2.155263157894737e-05,
"loss": 0.3168,
"step": 1270
},
{
"epoch": 0.24,
"learning_rate": 2.1473684210526316e-05,
"loss": 0.3004,
"step": 1280
},
{
"epoch": 0.24,
"learning_rate": 2.1394736842105264e-05,
"loss": 0.3084,
"step": 1290
},
{
"epoch": 0.24,
"learning_rate": 2.1315789473684212e-05,
"loss": 0.3065,
"step": 1300
},
{
"epoch": 0.25,
"learning_rate": 2.1236842105263157e-05,
"loss": 0.318,
"step": 1310
},
{
"epoch": 0.25,
"learning_rate": 2.1157894736842106e-05,
"loss": 0.3035,
"step": 1320
},
{
"epoch": 0.25,
"learning_rate": 2.107894736842105e-05,
"loss": 0.2849,
"step": 1330
},
{
"epoch": 0.25,
"learning_rate": 2.1e-05,
"loss": 0.2861,
"step": 1340
},
{
"epoch": 0.25,
"learning_rate": 2.0921052631578947e-05,
"loss": 0.2852,
"step": 1350
},
{
"epoch": 0.25,
"learning_rate": 2.0842105263157895e-05,
"loss": 0.3204,
"step": 1360
},
{
"epoch": 0.26,
"learning_rate": 2.0763157894736844e-05,
"loss": 0.31,
"step": 1370
},
{
"epoch": 0.26,
"learning_rate": 2.0684210526315792e-05,
"loss": 0.2854,
"step": 1380
},
{
"epoch": 0.26,
"learning_rate": 2.0605263157894737e-05,
"loss": 0.2855,
"step": 1390
},
{
"epoch": 0.26,
"learning_rate": 2.0526315789473685e-05,
"loss": 0.3003,
"step": 1400
},
{
"epoch": 0.26,
"learning_rate": 2.0447368421052634e-05,
"loss": 0.2889,
"step": 1410
},
{
"epoch": 0.27,
"learning_rate": 2.036842105263158e-05,
"loss": 0.2834,
"step": 1420
},
{
"epoch": 0.27,
"learning_rate": 2.0289473684210527e-05,
"loss": 0.2696,
"step": 1430
},
{
"epoch": 0.27,
"learning_rate": 2.0210526315789475e-05,
"loss": 0.289,
"step": 1440
},
{
"epoch": 0.27,
"learning_rate": 2.013157894736842e-05,
"loss": 0.2851,
"step": 1450
},
{
"epoch": 0.27,
"learning_rate": 2.0052631578947368e-05,
"loss": 0.2903,
"step": 1460
},
{
"epoch": 0.28,
"learning_rate": 1.9973684210526317e-05,
"loss": 0.2742,
"step": 1470
},
{
"epoch": 0.28,
"learning_rate": 1.989473684210526e-05,
"loss": 0.2775,
"step": 1480
},
{
"epoch": 0.28,
"learning_rate": 1.9815789473684213e-05,
"loss": 0.2783,
"step": 1490
},
{
"epoch": 0.28,
"learning_rate": 1.9736842105263158e-05,
"loss": 0.2814,
"step": 1500
},
{
"epoch": 0.28,
"learning_rate": 1.9657894736842106e-05,
"loss": 0.2931,
"step": 1510
},
{
"epoch": 0.28,
"learning_rate": 1.9578947368421055e-05,
"loss": 0.2811,
"step": 1520
},
{
"epoch": 0.29,
"learning_rate": 1.95e-05,
"loss": 0.29,
"step": 1530
},
{
"epoch": 0.29,
"learning_rate": 1.9421052631578948e-05,
"loss": 0.2925,
"step": 1540
},
{
"epoch": 0.29,
"learning_rate": 1.9342105263157896e-05,
"loss": 0.2658,
"step": 1550
},
{
"epoch": 0.29,
"learning_rate": 1.926315789473684e-05,
"loss": 0.2959,
"step": 1560
},
{
"epoch": 0.29,
"learning_rate": 1.918421052631579e-05,
"loss": 0.2695,
"step": 1570
},
{
"epoch": 0.3,
"learning_rate": 1.9105263157894738e-05,
"loss": 0.2902,
"step": 1580
},
{
"epoch": 0.3,
"learning_rate": 1.9026315789473683e-05,
"loss": 0.2597,
"step": 1590
},
{
"epoch": 0.3,
"learning_rate": 1.894736842105263e-05,
"loss": 0.2745,
"step": 1600
},
{
"epoch": 0.3,
"learning_rate": 1.886842105263158e-05,
"loss": 0.2775,
"step": 1610
},
{
"epoch": 0.3,
"learning_rate": 1.8789473684210524e-05,
"loss": 0.2707,
"step": 1620
},
{
"epoch": 0.31,
"learning_rate": 1.8710526315789476e-05,
"loss": 0.2699,
"step": 1630
},
{
"epoch": 0.31,
"learning_rate": 1.8631578947368424e-05,
"loss": 0.2762,
"step": 1640
},
{
"epoch": 0.31,
"learning_rate": 1.855263157894737e-05,
"loss": 0.2539,
"step": 1650
},
{
"epoch": 0.31,
"learning_rate": 1.8473684210526317e-05,
"loss": 0.273,
"step": 1660
},
{
"epoch": 0.31,
"learning_rate": 1.8394736842105266e-05,
"loss": 0.244,
"step": 1670
},
{
"epoch": 0.31,
"learning_rate": 1.831578947368421e-05,
"loss": 0.2632,
"step": 1680
},
{
"epoch": 0.32,
"learning_rate": 1.823684210526316e-05,
"loss": 0.2756,
"step": 1690
},
{
"epoch": 0.32,
"learning_rate": 1.8157894736842107e-05,
"loss": 0.2555,
"step": 1700
},
{
"epoch": 0.32,
"learning_rate": 1.8078947368421052e-05,
"loss": 0.2863,
"step": 1710
},
{
"epoch": 0.32,
"learning_rate": 1.8e-05,
"loss": 0.2516,
"step": 1720
},
{
"epoch": 0.32,
"learning_rate": 1.7921052631578945e-05,
"loss": 0.2467,
"step": 1730
},
{
"epoch": 0.33,
"learning_rate": 1.7842105263157894e-05,
"loss": 0.2591,
"step": 1740
},
{
"epoch": 0.33,
"learning_rate": 1.7763157894736842e-05,
"loss": 0.2641,
"step": 1750
},
{
"epoch": 0.33,
"learning_rate": 1.7684210526315787e-05,
"loss": 0.2658,
"step": 1760
},
{
"epoch": 0.33,
"learning_rate": 1.760526315789474e-05,
"loss": 0.252,
"step": 1770
},
{
"epoch": 0.33,
"learning_rate": 1.7526315789473687e-05,
"loss": 0.2654,
"step": 1780
},
{
"epoch": 0.34,
"learning_rate": 1.7447368421052632e-05,
"loss": 0.2723,
"step": 1790
},
{
"epoch": 0.34,
"learning_rate": 1.736842105263158e-05,
"loss": 0.2716,
"step": 1800
},
{
"epoch": 0.34,
"learning_rate": 1.728947368421053e-05,
"loss": 0.2741,
"step": 1810
},
{
"epoch": 0.34,
"learning_rate": 1.7210526315789473e-05,
"loss": 0.2708,
"step": 1820
},
{
"epoch": 0.34,
"learning_rate": 1.713157894736842e-05,
"loss": 0.2484,
"step": 1830
},
{
"epoch": 0.34,
"learning_rate": 1.705263157894737e-05,
"loss": 0.2615,
"step": 1840
},
{
"epoch": 0.35,
"learning_rate": 1.6973684210526315e-05,
"loss": 0.2547,
"step": 1850
},
{
"epoch": 0.35,
"learning_rate": 1.6894736842105263e-05,
"loss": 0.264,
"step": 1860
},
{
"epoch": 0.35,
"learning_rate": 1.681578947368421e-05,
"loss": 0.2514,
"step": 1870
},
{
"epoch": 0.35,
"learning_rate": 1.6736842105263156e-05,
"loss": 0.2385,
"step": 1880
},
{
"epoch": 0.35,
"learning_rate": 1.6657894736842105e-05,
"loss": 0.2539,
"step": 1890
},
{
"epoch": 0.36,
"learning_rate": 1.6578947368421053e-05,
"loss": 0.24,
"step": 1900
},
{
"epoch": 0.36,
"learning_rate": 1.65e-05,
"loss": 0.2509,
"step": 1910
},
{
"epoch": 0.36,
"learning_rate": 1.642105263157895e-05,
"loss": 0.2573,
"step": 1920
},
{
"epoch": 0.36,
"learning_rate": 1.6342105263157894e-05,
"loss": 0.2427,
"step": 1930
},
{
"epoch": 0.36,
"learning_rate": 1.6263157894736843e-05,
"loss": 0.2572,
"step": 1940
},
{
"epoch": 0.36,
"learning_rate": 1.618421052631579e-05,
"loss": 0.2522,
"step": 1950
},
{
"epoch": 0.37,
"learning_rate": 1.6105263157894736e-05,
"loss": 0.2553,
"step": 1960
},
{
"epoch": 0.37,
"learning_rate": 1.6026315789473684e-05,
"loss": 0.2436,
"step": 1970
},
{
"epoch": 0.37,
"learning_rate": 1.5947368421052633e-05,
"loss": 0.2572,
"step": 1980
},
{
"epoch": 0.37,
"learning_rate": 1.5868421052631578e-05,
"loss": 0.2541,
"step": 1990
},
{
"epoch": 0.37,
"learning_rate": 1.5789473684210526e-05,
"loss": 0.2417,
"step": 2000
},
{
"epoch": 0.38,
"learning_rate": 1.5710526315789474e-05,
"loss": 0.2586,
"step": 2010
},
{
"epoch": 0.38,
"learning_rate": 1.563157894736842e-05,
"loss": 0.2383,
"step": 2020
},
{
"epoch": 0.38,
"learning_rate": 1.5552631578947367e-05,
"loss": 0.2497,
"step": 2030
},
{
"epoch": 0.38,
"learning_rate": 1.547368421052632e-05,
"loss": 0.2357,
"step": 2040
},
{
"epoch": 0.38,
"learning_rate": 1.5394736842105264e-05,
"loss": 0.26,
"step": 2050
},
{
"epoch": 0.39,
"learning_rate": 1.5315789473684212e-05,
"loss": 0.2442,
"step": 2060
},
{
"epoch": 0.39,
"learning_rate": 1.5236842105263159e-05,
"loss": 0.2269,
"step": 2070
},
{
"epoch": 0.39,
"learning_rate": 1.5157894736842105e-05,
"loss": 0.2419,
"step": 2080
},
{
"epoch": 0.39,
"learning_rate": 1.5078947368421054e-05,
"loss": 0.2442,
"step": 2090
},
{
"epoch": 0.39,
"learning_rate": 1.5e-05,
"loss": 0.2331,
"step": 2100
},
{
"epoch": 0.39,
"learning_rate": 1.4921052631578947e-05,
"loss": 0.2311,
"step": 2110
},
{
"epoch": 0.4,
"learning_rate": 1.4842105263157895e-05,
"loss": 0.2444,
"step": 2120
},
{
"epoch": 0.4,
"learning_rate": 1.4763157894736842e-05,
"loss": 0.2409,
"step": 2130
},
{
"epoch": 0.4,
"learning_rate": 1.468421052631579e-05,
"loss": 0.2365,
"step": 2140
},
{
"epoch": 0.4,
"learning_rate": 1.4605263157894737e-05,
"loss": 0.2517,
"step": 2150
},
{
"epoch": 0.4,
"learning_rate": 1.4526315789473685e-05,
"loss": 0.2562,
"step": 2160
},
{
"epoch": 0.41,
"learning_rate": 1.4447368421052632e-05,
"loss": 0.2233,
"step": 2170
},
{
"epoch": 0.41,
"learning_rate": 1.4368421052631578e-05,
"loss": 0.2496,
"step": 2180
},
{
"epoch": 0.41,
"learning_rate": 1.4289473684210527e-05,
"loss": 0.2452,
"step": 2190
},
{
"epoch": 0.41,
"learning_rate": 1.4210526315789473e-05,
"loss": 0.2374,
"step": 2200
},
{
"epoch": 0.41,
"learning_rate": 1.4131578947368422e-05,
"loss": 0.2233,
"step": 2210
},
{
"epoch": 0.42,
"learning_rate": 1.405263157894737e-05,
"loss": 0.2571,
"step": 2220
},
{
"epoch": 0.42,
"learning_rate": 1.3973684210526316e-05,
"loss": 0.2254,
"step": 2230
},
{
"epoch": 0.42,
"learning_rate": 1.3894736842105263e-05,
"loss": 0.2236,
"step": 2240
},
{
"epoch": 0.42,
"learning_rate": 1.3815789473684211e-05,
"loss": 0.2284,
"step": 2250
},
{
"epoch": 0.42,
"learning_rate": 1.3736842105263158e-05,
"loss": 0.232,
"step": 2260
},
{
"epoch": 0.42,
"learning_rate": 1.3657894736842106e-05,
"loss": 0.2415,
"step": 2270
},
{
"epoch": 0.43,
"learning_rate": 1.3578947368421053e-05,
"loss": 0.2244,
"step": 2280
},
{
"epoch": 0.43,
"learning_rate": 1.3500000000000001e-05,
"loss": 0.2338,
"step": 2290
},
{
"epoch": 0.43,
"learning_rate": 1.3421052631578948e-05,
"loss": 0.2382,
"step": 2300
},
{
"epoch": 0.43,
"learning_rate": 1.3342105263157894e-05,
"loss": 0.2349,
"step": 2310
},
{
"epoch": 0.43,
"learning_rate": 1.3263157894736843e-05,
"loss": 0.2267,
"step": 2320
},
{
"epoch": 0.44,
"learning_rate": 1.318421052631579e-05,
"loss": 0.2426,
"step": 2330
},
{
"epoch": 0.44,
"learning_rate": 1.3105263157894738e-05,
"loss": 0.238,
"step": 2340
},
{
"epoch": 0.44,
"learning_rate": 1.3026315789473684e-05,
"loss": 0.2332,
"step": 2350
},
{
"epoch": 0.44,
"learning_rate": 1.2947368421052633e-05,
"loss": 0.2332,
"step": 2360
},
{
"epoch": 0.44,
"learning_rate": 1.2868421052631579e-05,
"loss": 0.2477,
"step": 2370
},
{
"epoch": 0.45,
"learning_rate": 1.2789473684210526e-05,
"loss": 0.2176,
"step": 2380
},
{
"epoch": 0.45,
"learning_rate": 1.2710526315789474e-05,
"loss": 0.2177,
"step": 2390
},
{
"epoch": 0.45,
"learning_rate": 1.263157894736842e-05,
"loss": 0.2278,
"step": 2400
},
{
"epoch": 0.45,
"learning_rate": 1.2552631578947369e-05,
"loss": 0.2221,
"step": 2410
},
{
"epoch": 0.45,
"learning_rate": 1.2473684210526317e-05,
"loss": 0.2173,
"step": 2420
},
{
"epoch": 0.45,
"learning_rate": 1.2394736842105264e-05,
"loss": 0.2131,
"step": 2430
},
{
"epoch": 0.46,
"learning_rate": 1.231578947368421e-05,
"loss": 0.2346,
"step": 2440
},
{
"epoch": 0.46,
"learning_rate": 1.2236842105263159e-05,
"loss": 0.2301,
"step": 2450
},
{
"epoch": 0.46,
"learning_rate": 1.2157894736842105e-05,
"loss": 0.2173,
"step": 2460
},
{
"epoch": 0.46,
"learning_rate": 1.2078947368421052e-05,
"loss": 0.2297,
"step": 2470
},
{
"epoch": 0.46,
"learning_rate": 1.2e-05,
"loss": 0.2255,
"step": 2480
},
{
"epoch": 0.47,
"learning_rate": 1.1921052631578949e-05,
"loss": 0.2307,
"step": 2490
},
{
"epoch": 0.47,
"learning_rate": 1.1842105263157895e-05,
"loss": 0.222,
"step": 2500
},
{
"epoch": 0.47,
"learning_rate": 1.1763157894736842e-05,
"loss": 0.2319,
"step": 2510
},
{
"epoch": 0.47,
"learning_rate": 1.168421052631579e-05,
"loss": 0.228,
"step": 2520
},
{
"epoch": 0.47,
"learning_rate": 1.1605263157894737e-05,
"loss": 0.2267,
"step": 2530
},
{
"epoch": 0.48,
"learning_rate": 1.1526315789473683e-05,
"loss": 0.2163,
"step": 2540
},
{
"epoch": 0.48,
"learning_rate": 1.1447368421052632e-05,
"loss": 0.2167,
"step": 2550
},
{
"epoch": 0.48,
"learning_rate": 1.136842105263158e-05,
"loss": 0.1991,
"step": 2560
},
{
"epoch": 0.48,
"learning_rate": 1.1289473684210527e-05,
"loss": 0.2165,
"step": 2570
},
{
"epoch": 0.48,
"learning_rate": 1.1210526315789473e-05,
"loss": 0.2147,
"step": 2580
},
{
"epoch": 0.48,
"learning_rate": 1.1131578947368421e-05,
"loss": 0.2203,
"step": 2590
},
{
"epoch": 0.49,
"learning_rate": 1.1052631578947368e-05,
"loss": 0.2253,
"step": 2600
},
{
"epoch": 0.49,
"learning_rate": 1.0973684210526316e-05,
"loss": 0.2326,
"step": 2610
},
{
"epoch": 0.49,
"learning_rate": 1.0894736842105265e-05,
"loss": 0.221,
"step": 2620
},
{
"epoch": 0.49,
"learning_rate": 1.0815789473684211e-05,
"loss": 0.2233,
"step": 2630
},
{
"epoch": 0.49,
"learning_rate": 1.0736842105263158e-05,
"loss": 0.2199,
"step": 2640
},
{
"epoch": 0.5,
"learning_rate": 1.0657894736842106e-05,
"loss": 0.2077,
"step": 2650
},
{
"epoch": 0.5,
"learning_rate": 1.0578947368421053e-05,
"loss": 0.2108,
"step": 2660
},
{
"epoch": 0.5,
"learning_rate": 1.05e-05,
"loss": 0.2217,
"step": 2670
},
{
"epoch": 0.5,
"learning_rate": 1.0421052631578948e-05,
"loss": 0.2067,
"step": 2680
},
{
"epoch": 0.5,
"learning_rate": 1.0342105263157896e-05,
"loss": 0.2351,
"step": 2690
},
{
"epoch": 0.51,
"learning_rate": 1.0263157894736843e-05,
"loss": 0.1952,
"step": 2700
},
{
"epoch": 0.51,
"learning_rate": 1.018421052631579e-05,
"loss": 0.2259,
"step": 2710
},
{
"epoch": 0.51,
"learning_rate": 1.0105263157894738e-05,
"loss": 0.2074,
"step": 2720
},
{
"epoch": 0.51,
"learning_rate": 1.0026315789473684e-05,
"loss": 0.1963,
"step": 2730
},
{
"epoch": 0.51,
"learning_rate": 9.94736842105263e-06,
"loss": 0.2193,
"step": 2740
},
{
"epoch": 0.51,
"learning_rate": 9.868421052631579e-06,
"loss": 0.2172,
"step": 2750
},
{
"epoch": 0.52,
"learning_rate": 9.789473684210527e-06,
"loss": 0.2278,
"step": 2760
},
{
"epoch": 0.52,
"learning_rate": 9.710526315789474e-06,
"loss": 0.1933,
"step": 2770
},
{
"epoch": 0.52,
"learning_rate": 9.63157894736842e-06,
"loss": 0.2177,
"step": 2780
},
{
"epoch": 0.52,
"learning_rate": 9.552631578947369e-06,
"loss": 0.204,
"step": 2790
},
{
"epoch": 0.52,
"learning_rate": 9.473684210526315e-06,
"loss": 0.2178,
"step": 2800
},
{
"epoch": 0.53,
"learning_rate": 9.394736842105262e-06,
"loss": 0.2012,
"step": 2810
},
{
"epoch": 0.53,
"learning_rate": 9.315789473684212e-06,
"loss": 0.2054,
"step": 2820
},
{
"epoch": 0.53,
"learning_rate": 9.236842105263159e-06,
"loss": 0.2067,
"step": 2830
},
{
"epoch": 0.53,
"learning_rate": 9.157894736842105e-06,
"loss": 0.2132,
"step": 2840
},
{
"epoch": 0.53,
"learning_rate": 9.078947368421054e-06,
"loss": 0.2081,
"step": 2850
},
{
"epoch": 0.54,
"learning_rate": 9e-06,
"loss": 0.2033,
"step": 2860
},
{
"epoch": 0.54,
"learning_rate": 8.921052631578947e-06,
"loss": 0.2158,
"step": 2870
},
{
"epoch": 0.54,
"learning_rate": 8.842105263157893e-06,
"loss": 0.2043,
"step": 2880
},
{
"epoch": 0.54,
"learning_rate": 8.763157894736843e-06,
"loss": 0.1979,
"step": 2890
},
{
"epoch": 0.54,
"learning_rate": 8.68421052631579e-06,
"loss": 0.213,
"step": 2900
},
{
"epoch": 0.54,
"learning_rate": 8.605263157894737e-06,
"loss": 0.2042,
"step": 2910
},
{
"epoch": 0.55,
"learning_rate": 8.526315789473685e-06,
"loss": 0.2067,
"step": 2920
},
{
"epoch": 0.55,
"learning_rate": 8.447368421052632e-06,
"loss": 0.2052,
"step": 2930
},
{
"epoch": 0.55,
"learning_rate": 8.368421052631578e-06,
"loss": 0.2036,
"step": 2940
},
{
"epoch": 0.55,
"learning_rate": 8.289473684210526e-06,
"loss": 0.2224,
"step": 2950
},
{
"epoch": 0.55,
"learning_rate": 8.210526315789475e-06,
"loss": 0.2106,
"step": 2960
},
{
"epoch": 0.56,
"learning_rate": 8.131578947368421e-06,
"loss": 0.1956,
"step": 2970
},
{
"epoch": 0.56,
"learning_rate": 8.052631578947368e-06,
"loss": 0.1973,
"step": 2980
},
{
"epoch": 0.56,
"learning_rate": 7.973684210526316e-06,
"loss": 0.2103,
"step": 2990
},
{
"epoch": 0.56,
"learning_rate": 7.894736842105263e-06,
"loss": 0.2011,
"step": 3000
},
{
"epoch": 0.56,
"learning_rate": 7.81578947368421e-06,
"loss": 0.2085,
"step": 3010
},
{
"epoch": 0.57,
"learning_rate": 7.73684210526316e-06,
"loss": 0.2042,
"step": 3020
},
{
"epoch": 0.57,
"learning_rate": 7.657894736842106e-06,
"loss": 0.2123,
"step": 3030
},
{
"epoch": 0.57,
"learning_rate": 7.578947368421053e-06,
"loss": 0.1853,
"step": 3040
},
{
"epoch": 0.57,
"learning_rate": 7.5e-06,
"loss": 0.1943,
"step": 3050
},
{
"epoch": 0.57,
"learning_rate": 7.421052631578948e-06,
"loss": 0.2049,
"step": 3060
},
{
"epoch": 0.57,
"learning_rate": 7.342105263157895e-06,
"loss": 0.1986,
"step": 3070
},
{
"epoch": 0.58,
"learning_rate": 7.2631578947368426e-06,
"loss": 0.1912,
"step": 3080
},
{
"epoch": 0.58,
"learning_rate": 7.184210526315789e-06,
"loss": 0.2061,
"step": 3090
},
{
"epoch": 0.58,
"learning_rate": 7.105263157894737e-06,
"loss": 0.1968,
"step": 3100
},
{
"epoch": 0.58,
"learning_rate": 7.026315789473685e-06,
"loss": 0.1827,
"step": 3110
},
{
"epoch": 0.58,
"learning_rate": 6.9473684210526315e-06,
"loss": 0.2016,
"step": 3120
},
{
"epoch": 0.59,
"learning_rate": 6.868421052631579e-06,
"loss": 0.2212,
"step": 3130
},
{
"epoch": 0.59,
"learning_rate": 6.7894736842105264e-06,
"loss": 0.2108,
"step": 3140
},
{
"epoch": 0.59,
"learning_rate": 6.710526315789474e-06,
"loss": 0.2121,
"step": 3150
},
{
"epoch": 0.59,
"learning_rate": 6.631578947368421e-06,
"loss": 0.1888,
"step": 3160
},
{
"epoch": 0.59,
"learning_rate": 6.552631578947369e-06,
"loss": 0.2018,
"step": 3170
},
{
"epoch": 0.6,
"learning_rate": 6.473684210526316e-06,
"loss": 0.2049,
"step": 3180
},
{
"epoch": 0.6,
"learning_rate": 6.394736842105263e-06,
"loss": 0.2084,
"step": 3190
},
{
"epoch": 0.6,
"learning_rate": 6.31578947368421e-06,
"loss": 0.188,
"step": 3200
},
{
"epoch": 0.6,
"learning_rate": 6.236842105263159e-06,
"loss": 0.1863,
"step": 3210
},
{
"epoch": 0.6,
"learning_rate": 6.157894736842105e-06,
"loss": 0.2082,
"step": 3220
},
{
"epoch": 0.6,
"learning_rate": 6.078947368421053e-06,
"loss": 0.2047,
"step": 3230
},
{
"epoch": 0.61,
"learning_rate": 6e-06,
"loss": 0.208,
"step": 3240
},
{
"epoch": 0.61,
"learning_rate": 5.921052631578948e-06,
"loss": 0.2067,
"step": 3250
},
{
"epoch": 0.61,
"learning_rate": 5.842105263157895e-06,
"loss": 0.2032,
"step": 3260
},
{
"epoch": 0.61,
"learning_rate": 5.763157894736842e-06,
"loss": 0.1891,
"step": 3270
},
{
"epoch": 0.61,
"learning_rate": 5.68421052631579e-06,
"loss": 0.176,
"step": 3280
},
{
"epoch": 0.62,
"learning_rate": 5.605263157894737e-06,
"loss": 0.2063,
"step": 3290
},
{
"epoch": 0.62,
"learning_rate": 5.526315789473684e-06,
"loss": 0.2006,
"step": 3300
},
{
"epoch": 0.62,
"learning_rate": 5.447368421052632e-06,
"loss": 0.2016,
"step": 3310
},
{
"epoch": 0.62,
"learning_rate": 5.368421052631579e-06,
"loss": 0.1875,
"step": 3320
},
{
"epoch": 0.62,
"learning_rate": 5.289473684210526e-06,
"loss": 0.1951,
"step": 3330
},
{
"epoch": 0.63,
"learning_rate": 5.210526315789474e-06,
"loss": 0.2017,
"step": 3340
},
{
"epoch": 0.63,
"learning_rate": 5.131578947368421e-06,
"loss": 0.2072,
"step": 3350
},
{
"epoch": 0.63,
"learning_rate": 5.052631578947369e-06,
"loss": 0.1919,
"step": 3360
},
{
"epoch": 0.63,
"learning_rate": 4.973684210526315e-06,
"loss": 0.2072,
"step": 3370
},
{
"epoch": 0.63,
"learning_rate": 4.894736842105264e-06,
"loss": 0.1955,
"step": 3380
},
{
"epoch": 0.63,
"learning_rate": 4.81578947368421e-06,
"loss": 0.2037,
"step": 3390
},
{
"epoch": 0.64,
"learning_rate": 4.736842105263158e-06,
"loss": 0.1891,
"step": 3400
},
{
"epoch": 0.64,
"learning_rate": 4.657894736842106e-06,
"loss": 0.1814,
"step": 3410
},
{
"epoch": 0.64,
"learning_rate": 4.578947368421053e-06,
"loss": 0.1737,
"step": 3420
},
{
"epoch": 0.64,
"learning_rate": 4.5e-06,
"loss": 0.2123,
"step": 3430
},
{
"epoch": 0.64,
"learning_rate": 4.421052631578947e-06,
"loss": 0.1984,
"step": 3440
},
{
"epoch": 0.65,
"learning_rate": 4.342105263157895e-06,
"loss": 0.1857,
"step": 3450
},
{
"epoch": 0.65,
"learning_rate": 4.2631578947368425e-06,
"loss": 0.1855,
"step": 3460
},
{
"epoch": 0.65,
"learning_rate": 4.184210526315789e-06,
"loss": 0.1993,
"step": 3470
},
{
"epoch": 0.65,
"learning_rate": 4.105263157894737e-06,
"loss": 0.1951,
"step": 3480
},
{
"epoch": 0.65,
"learning_rate": 4.026315789473684e-06,
"loss": 0.1976,
"step": 3490
},
{
"epoch": 0.66,
"learning_rate": 3.9473684210526315e-06,
"loss": 0.1848,
"step": 3500
},
{
"epoch": 0.66,
"learning_rate": 3.86842105263158e-06,
"loss": 0.2031,
"step": 3510
},
{
"epoch": 0.66,
"learning_rate": 3.7894736842105264e-06,
"loss": 0.1773,
"step": 3520
},
{
"epoch": 0.66,
"learning_rate": 3.710526315789474e-06,
"loss": 0.1955,
"step": 3530
},
{
"epoch": 0.66,
"learning_rate": 3.6315789473684213e-06,
"loss": 0.1836,
"step": 3540
},
{
"epoch": 0.66,
"learning_rate": 3.5526315789473683e-06,
"loss": 0.199,
"step": 3550
},
{
"epoch": 0.67,
"learning_rate": 3.4736842105263158e-06,
"loss": 0.1827,
"step": 3560
},
{
"epoch": 0.67,
"learning_rate": 3.3947368421052632e-06,
"loss": 0.1917,
"step": 3570
},
{
"epoch": 0.67,
"learning_rate": 3.3157894736842107e-06,
"loss": 0.1849,
"step": 3580
},
{
"epoch": 0.67,
"learning_rate": 3.236842105263158e-06,
"loss": 0.1913,
"step": 3590
},
{
"epoch": 0.67,
"learning_rate": 3.157894736842105e-06,
"loss": 0.1822,
"step": 3600
},
{
"epoch": 0.68,
"learning_rate": 3.0789473684210526e-06,
"loss": 0.2005,
"step": 3610
},
{
"epoch": 0.68,
"learning_rate": 3e-06,
"loss": 0.1969,
"step": 3620
},
{
"epoch": 0.68,
"learning_rate": 2.9210526315789475e-06,
"loss": 0.184,
"step": 3630
},
{
"epoch": 0.68,
"learning_rate": 2.842105263157895e-06,
"loss": 0.1829,
"step": 3640
},
{
"epoch": 0.68,
"learning_rate": 2.763157894736842e-06,
"loss": 0.1856,
"step": 3650
},
{
"epoch": 0.69,
"learning_rate": 2.6842105263157895e-06,
"loss": 0.18,
"step": 3660
},
{
"epoch": 0.69,
"learning_rate": 2.605263157894737e-06,
"loss": 0.1794,
"step": 3670
},
{
"epoch": 0.69,
"learning_rate": 2.5263157894736844e-06,
"loss": 0.1892,
"step": 3680
},
{
"epoch": 0.69,
"learning_rate": 2.447368421052632e-06,
"loss": 0.1953,
"step": 3690
},
{
"epoch": 0.69,
"learning_rate": 2.368421052631579e-06,
"loss": 0.1996,
"step": 3700
},
{
"epoch": 0.69,
"learning_rate": 2.2894736842105263e-06,
"loss": 0.1781,
"step": 3710
},
{
"epoch": 0.7,
"learning_rate": 2.2105263157894734e-06,
"loss": 0.1884,
"step": 3720
},
{
"epoch": 0.7,
"learning_rate": 2.1315789473684212e-06,
"loss": 0.1829,
"step": 3730
},
{
"epoch": 0.7,
"learning_rate": 2.0526315789473687e-06,
"loss": 0.1853,
"step": 3740
},
{
"epoch": 0.7,
"learning_rate": 1.9736842105263157e-06,
"loss": 0.1999,
"step": 3750
},
{
"epoch": 0.7,
"learning_rate": 1.8947368421052632e-06,
"loss": 0.181,
"step": 3760
},
{
"epoch": 0.71,
"learning_rate": 1.8157894736842106e-06,
"loss": 0.1769,
"step": 3770
},
{
"epoch": 0.71,
"learning_rate": 1.7368421052631579e-06,
"loss": 0.1965,
"step": 3780
},
{
"epoch": 0.71,
"learning_rate": 1.6578947368421053e-06,
"loss": 0.1866,
"step": 3790
},
{
"epoch": 0.71,
"learning_rate": 1.5789473684210526e-06,
"loss": 0.1848,
"step": 3800
},
{
"epoch": 0.71,
"learning_rate": 1.5e-06,
"loss": 0.204,
"step": 3810
},
{
"epoch": 0.71,
"learning_rate": 1.4210526315789475e-06,
"loss": 0.1807,
"step": 3820
},
{
"epoch": 0.72,
"learning_rate": 1.3421052631578947e-06,
"loss": 0.1858,
"step": 3830
},
{
"epoch": 0.72,
"learning_rate": 1.2631578947368422e-06,
"loss": 0.1953,
"step": 3840
},
{
"epoch": 0.72,
"learning_rate": 1.1842105263157894e-06,
"loss": 0.1793,
"step": 3850
},
{
"epoch": 0.72,
"learning_rate": 1.1052631578947367e-06,
"loss": 0.1987,
"step": 3860
},
{
"epoch": 0.72,
"learning_rate": 1.0263157894736843e-06,
"loss": 0.1857,
"step": 3870
},
{
"epoch": 0.73,
"learning_rate": 9.473684210526316e-07,
"loss": 0.1767,
"step": 3880
},
{
"epoch": 0.73,
"learning_rate": 8.684210526315789e-07,
"loss": 0.1784,
"step": 3890
},
{
"epoch": 0.73,
"learning_rate": 7.894736842105263e-07,
"loss": 0.1657,
"step": 3900
},
{
"epoch": 0.73,
"learning_rate": 7.105263157894737e-07,
"loss": 0.1801,
"step": 3910
},
{
"epoch": 0.73,
"learning_rate": 6.315789473684211e-07,
"loss": 0.1853,
"step": 3920
},
{
"epoch": 0.74,
"learning_rate": 5.526315789473683e-07,
"loss": 0.1843,
"step": 3930
},
{
"epoch": 0.74,
"learning_rate": 4.736842105263158e-07,
"loss": 0.2039,
"step": 3940
},
{
"epoch": 0.74,
"learning_rate": 3.9473684210526315e-07,
"loss": 0.2031,
"step": 3950
},
{
"epoch": 0.74,
"learning_rate": 3.1578947368421055e-07,
"loss": 0.1851,
"step": 3960
},
{
"epoch": 0.74,
"learning_rate": 2.368421052631579e-07,
"loss": 0.1765,
"step": 3970
},
{
"epoch": 0.74,
"learning_rate": 1.5789473684210527e-07,
"loss": 0.1724,
"step": 3980
},
{
"epoch": 0.75,
"learning_rate": 7.894736842105264e-08,
"loss": 0.1824,
"step": 3990
},
{
"epoch": 0.75,
"learning_rate": 0.0,
"loss": 0.1716,
"step": 4000
}
],
"max_steps": 4000,
"num_train_epochs": 1,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}