|
{ |
|
"best_metric": 0.7822743590634694, |
|
"best_model_checkpoint": "./checkpoints/clip-stage3pa-1024/checkpoint-3810", |
|
"epoch": 0.7486430844095078, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.727, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3e-06, |
|
"loss": 0.6697, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.6164, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6e-06, |
|
"loss": 0.5883, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.5711, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9e-06, |
|
"loss": 0.5863, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.05e-05, |
|
"loss": 0.5441, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.5303, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 0.5436, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.5507, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.5534, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.5384, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.95e-05, |
|
"loss": 0.5533, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.5475, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.5482, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.5099, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.55e-05, |
|
"loss": 0.539, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.5176, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 0.526, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5276, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.992105263157895e-05, |
|
"loss": 0.5077, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9842105263157894e-05, |
|
"loss": 0.5375, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9763157894736842e-05, |
|
"loss": 0.5411, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.968421052631579e-05, |
|
"loss": 0.4877, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9605263157894735e-05, |
|
"loss": 0.5347, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9526315789473684e-05, |
|
"loss": 0.4839, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9447368421052635e-05, |
|
"loss": 0.4936, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.936842105263158e-05, |
|
"loss": 0.4849, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.928947368421053e-05, |
|
"loss": 0.4696, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9210526315789474e-05, |
|
"loss": 0.4872, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9131578947368422e-05, |
|
"loss": 0.4636, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.905263157894737e-05, |
|
"loss": 0.4728, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8973684210526315e-05, |
|
"loss": 0.4743, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8894736842105263e-05, |
|
"loss": 0.4774, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8815789473684212e-05, |
|
"loss": 0.4372, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8736842105263157e-05, |
|
"loss": 0.4515, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8657894736842105e-05, |
|
"loss": 0.422, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8578947368421053e-05, |
|
"loss": 0.4505, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 0.4383, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8421052631578946e-05, |
|
"loss": 0.4601, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.8342105263157898e-05, |
|
"loss": 0.4327, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.8263157894736843e-05, |
|
"loss": 0.4182, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.818421052631579e-05, |
|
"loss": 0.4421, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.810526315789474e-05, |
|
"loss": 0.427, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.8026315789473685e-05, |
|
"loss": 0.4397, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7947368421052633e-05, |
|
"loss": 0.4249, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.786842105263158e-05, |
|
"loss": 0.432, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7789473684210526e-05, |
|
"loss": 0.4264, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7710526315789474e-05, |
|
"loss": 0.4325, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7631578947368423e-05, |
|
"loss": 0.4204, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.7552631578947368e-05, |
|
"loss": 0.4246, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.7473684210526316e-05, |
|
"loss": 0.429, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.739473684210526e-05, |
|
"loss": 0.4003, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.7315789473684213e-05, |
|
"loss": 0.3946, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.723684210526316e-05, |
|
"loss": 0.4035, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.7157894736842106e-05, |
|
"loss": 0.384, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.7078947368421054e-05, |
|
"loss": 0.4095, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.386, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.6921052631578947e-05, |
|
"loss": 0.374, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.6842105263157896e-05, |
|
"loss": 0.4155, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.6763157894736844e-05, |
|
"loss": 0.403, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.668421052631579e-05, |
|
"loss": 0.3722, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6605263157894737e-05, |
|
"loss": 0.39, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6526315789473685e-05, |
|
"loss": 0.4041, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.644736842105263e-05, |
|
"loss": 0.4011, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.636842105263158e-05, |
|
"loss": 0.3887, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.6289473684210527e-05, |
|
"loss": 0.3807, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.6210526315789475e-05, |
|
"loss": 0.3762, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.6131578947368424e-05, |
|
"loss": 0.3831, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.605263157894737e-05, |
|
"loss": 0.3408, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.5973684210526317e-05, |
|
"loss": 0.362, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.5894736842105265e-05, |
|
"loss": 0.3606, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.581578947368421e-05, |
|
"loss": 0.3475, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.5736842105263158e-05, |
|
"loss": 0.3659, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.5657894736842107e-05, |
|
"loss": 0.3391, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.557894736842105e-05, |
|
"loss": 0.3744, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.55e-05, |
|
"loss": 0.3609, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5421052631578948e-05, |
|
"loss": 0.3525, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5342105263157893e-05, |
|
"loss": 0.3527, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.526315789473684e-05, |
|
"loss": 0.3566, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.518421052631579e-05, |
|
"loss": 0.3327, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5105263157894738e-05, |
|
"loss": 0.3789, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.5026315789473686e-05, |
|
"loss": 0.3415, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.4947368421052635e-05, |
|
"loss": 0.3342, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.486842105263158e-05, |
|
"loss": 0.3415, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.4789473684210528e-05, |
|
"loss": 0.3562, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.4710526315789476e-05, |
|
"loss": 0.3405, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.463157894736842e-05, |
|
"loss": 0.368, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.455263157894737e-05, |
|
"loss": 0.3547, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4473684210526318e-05, |
|
"loss": 0.3508, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4394736842105262e-05, |
|
"loss": 0.34, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.431578947368421e-05, |
|
"loss": 0.3256, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4236842105263156e-05, |
|
"loss": 0.3515, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.4157894736842104e-05, |
|
"loss": 0.316, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.4078947368421056e-05, |
|
"loss": 0.3382, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.3314, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.392105263157895e-05, |
|
"loss": 0.3285, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.3842105263157897e-05, |
|
"loss": 0.3472, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.3763157894736842e-05, |
|
"loss": 0.3183, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 0.332, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.360526315789474e-05, |
|
"loss": 0.314, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.3526315789473684e-05, |
|
"loss": 0.3127, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.3447368421052632e-05, |
|
"loss": 0.3483, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.336842105263158e-05, |
|
"loss": 0.3408, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3289473684210525e-05, |
|
"loss": 0.3406, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3210526315789473e-05, |
|
"loss": 0.3205, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3131578947368422e-05, |
|
"loss": 0.3135, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3052631578947367e-05, |
|
"loss": 0.3107, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.297368421052632e-05, |
|
"loss": 0.3079, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.2894736842105263e-05, |
|
"loss": 0.3141, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.281578947368421e-05, |
|
"loss": 0.3186, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.273684210526316e-05, |
|
"loss": 0.3254, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.2657894736842105e-05, |
|
"loss": 0.3219, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.2578947368421053e-05, |
|
"loss": 0.3106, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3042, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.2421052631578946e-05, |
|
"loss": 0.3154, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.2342105263157895e-05, |
|
"loss": 0.3147, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.2263157894736843e-05, |
|
"loss": 0.2916, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.2184210526315788e-05, |
|
"loss": 0.3159, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.2105263157894736e-05, |
|
"loss": 0.3084, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.2026315789473684e-05, |
|
"loss": 0.3071, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.1947368421052633e-05, |
|
"loss": 0.312, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.186842105263158e-05, |
|
"loss": 0.3018, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.178947368421053e-05, |
|
"loss": 0.3195, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.1710526315789474e-05, |
|
"loss": 0.3115, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.1631578947368423e-05, |
|
"loss": 0.2827, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.155263157894737e-05, |
|
"loss": 0.3168, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.1473684210526316e-05, |
|
"loss": 0.3004, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.1394736842105264e-05, |
|
"loss": 0.3084, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.1315789473684212e-05, |
|
"loss": 0.3065, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.1236842105263157e-05, |
|
"loss": 0.318, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.1157894736842106e-05, |
|
"loss": 0.3035, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.107894736842105e-05, |
|
"loss": 0.2849, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.2861, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.0921052631578947e-05, |
|
"loss": 0.2852, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.0842105263157895e-05, |
|
"loss": 0.3204, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.0763157894736844e-05, |
|
"loss": 0.31, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.0684210526315792e-05, |
|
"loss": 0.2854, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.0605263157894737e-05, |
|
"loss": 0.2855, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.0526315789473685e-05, |
|
"loss": 0.3003, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.0447368421052634e-05, |
|
"loss": 0.2889, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.036842105263158e-05, |
|
"loss": 0.2834, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.0289473684210527e-05, |
|
"loss": 0.2696, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.0210526315789475e-05, |
|
"loss": 0.289, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.013157894736842e-05, |
|
"loss": 0.2851, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.0052631578947368e-05, |
|
"loss": 0.2903, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9973684210526317e-05, |
|
"loss": 0.2742, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.989473684210526e-05, |
|
"loss": 0.2775, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9815789473684213e-05, |
|
"loss": 0.2783, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 0.2814, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9657894736842106e-05, |
|
"loss": 0.2931, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9578947368421055e-05, |
|
"loss": 0.2811, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.95e-05, |
|
"loss": 0.29, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9421052631578948e-05, |
|
"loss": 0.2925, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9342105263157896e-05, |
|
"loss": 0.2658, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.926315789473684e-05, |
|
"loss": 0.2959, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.918421052631579e-05, |
|
"loss": 0.2695, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9105263157894738e-05, |
|
"loss": 0.2902, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9026315789473683e-05, |
|
"loss": 0.2597, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.894736842105263e-05, |
|
"loss": 0.2745, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.886842105263158e-05, |
|
"loss": 0.2775, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.8789473684210524e-05, |
|
"loss": 0.2707, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.8710526315789476e-05, |
|
"loss": 0.2699, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.8631578947368424e-05, |
|
"loss": 0.2762, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.855263157894737e-05, |
|
"loss": 0.2539, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.8473684210526317e-05, |
|
"loss": 0.273, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.8394736842105266e-05, |
|
"loss": 0.244, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.831578947368421e-05, |
|
"loss": 0.2632, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.823684210526316e-05, |
|
"loss": 0.2756, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.8157894736842107e-05, |
|
"loss": 0.2555, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.8078947368421052e-05, |
|
"loss": 0.2863, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.2516, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.7921052631578945e-05, |
|
"loss": 0.2467, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7842105263157894e-05, |
|
"loss": 0.2591, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7763157894736842e-05, |
|
"loss": 0.2641, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7684210526315787e-05, |
|
"loss": 0.2658, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.760526315789474e-05, |
|
"loss": 0.252, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7526315789473687e-05, |
|
"loss": 0.2654, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7447368421052632e-05, |
|
"loss": 0.2723, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.736842105263158e-05, |
|
"loss": 0.2716, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.728947368421053e-05, |
|
"loss": 0.2741, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7210526315789473e-05, |
|
"loss": 0.2708, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.713157894736842e-05, |
|
"loss": 0.2484, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.705263157894737e-05, |
|
"loss": 0.2615, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.6973684210526315e-05, |
|
"loss": 0.2547, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.6894736842105263e-05, |
|
"loss": 0.264, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.681578947368421e-05, |
|
"loss": 0.2514, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.6736842105263156e-05, |
|
"loss": 0.2385, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.6657894736842105e-05, |
|
"loss": 0.2539, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.6578947368421053e-05, |
|
"loss": 0.24, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.2509, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.642105263157895e-05, |
|
"loss": 0.2573, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.6342105263157894e-05, |
|
"loss": 0.2427, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.6263157894736843e-05, |
|
"loss": 0.2572, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.618421052631579e-05, |
|
"loss": 0.2522, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.6105263157894736e-05, |
|
"loss": 0.2553, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.6026315789473684e-05, |
|
"loss": 0.2436, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.5947368421052633e-05, |
|
"loss": 0.2572, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.5868421052631578e-05, |
|
"loss": 0.2541, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 0.2417, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.5710526315789474e-05, |
|
"loss": 0.2586, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.563157894736842e-05, |
|
"loss": 0.2383, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.5552631578947367e-05, |
|
"loss": 0.2497, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.547368421052632e-05, |
|
"loss": 0.2357, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.5394736842105264e-05, |
|
"loss": 0.26, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.5315789473684212e-05, |
|
"loss": 0.2442, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.5236842105263159e-05, |
|
"loss": 0.2269, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.5157894736842105e-05, |
|
"loss": 0.2419, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.5078947368421054e-05, |
|
"loss": 0.2442, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2331, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.4921052631578947e-05, |
|
"loss": 0.2311, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.4842105263157895e-05, |
|
"loss": 0.2444, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.4763157894736842e-05, |
|
"loss": 0.2409, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.468421052631579e-05, |
|
"loss": 0.2365, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.4605263157894737e-05, |
|
"loss": 0.2517, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.4526315789473685e-05, |
|
"loss": 0.2562, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.4447368421052632e-05, |
|
"loss": 0.2233, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.4368421052631578e-05, |
|
"loss": 0.2496, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.4289473684210527e-05, |
|
"loss": 0.2452, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.4210526315789473e-05, |
|
"loss": 0.2374, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.4131578947368422e-05, |
|
"loss": 0.2233, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.405263157894737e-05, |
|
"loss": 0.2571, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3973684210526316e-05, |
|
"loss": 0.2254, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3894736842105263e-05, |
|
"loss": 0.2236, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3815789473684211e-05, |
|
"loss": 0.2284, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3736842105263158e-05, |
|
"loss": 0.232, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3657894736842106e-05, |
|
"loss": 0.2415, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.3578947368421053e-05, |
|
"loss": 0.2244, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 0.2338, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.3421052631578948e-05, |
|
"loss": 0.2382, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.3342105263157894e-05, |
|
"loss": 0.2349, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.3263157894736843e-05, |
|
"loss": 0.2267, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.318421052631579e-05, |
|
"loss": 0.2426, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.3105263157894738e-05, |
|
"loss": 0.238, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.3026315789473684e-05, |
|
"loss": 0.2332, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2947368421052633e-05, |
|
"loss": 0.2332, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2868421052631579e-05, |
|
"loss": 0.2477, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2789473684210526e-05, |
|
"loss": 0.2176, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2710526315789474e-05, |
|
"loss": 0.2177, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.263157894736842e-05, |
|
"loss": 0.2278, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2552631578947369e-05, |
|
"loss": 0.2221, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2473684210526317e-05, |
|
"loss": 0.2173, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2394736842105264e-05, |
|
"loss": 0.2131, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.231578947368421e-05, |
|
"loss": 0.2346, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.2236842105263159e-05, |
|
"loss": 0.2301, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.2157894736842105e-05, |
|
"loss": 0.2173, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.2078947368421052e-05, |
|
"loss": 0.2297, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.2255, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1921052631578949e-05, |
|
"loss": 0.2307, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1842105263157895e-05, |
|
"loss": 0.222, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1763157894736842e-05, |
|
"loss": 0.2319, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.168421052631579e-05, |
|
"loss": 0.228, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1605263157894737e-05, |
|
"loss": 0.2267, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1526315789473683e-05, |
|
"loss": 0.2163, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1447368421052632e-05, |
|
"loss": 0.2167, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.136842105263158e-05, |
|
"loss": 0.1991, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1289473684210527e-05, |
|
"loss": 0.2165, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1210526315789473e-05, |
|
"loss": 0.2147, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1131578947368421e-05, |
|
"loss": 0.2203, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.1052631578947368e-05, |
|
"loss": 0.2253, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0973684210526316e-05, |
|
"loss": 0.2326, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0894736842105265e-05, |
|
"loss": 0.221, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0815789473684211e-05, |
|
"loss": 0.2233, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0736842105263158e-05, |
|
"loss": 0.2199, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0657894736842106e-05, |
|
"loss": 0.2077, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0578947368421053e-05, |
|
"loss": 0.2108, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.05e-05, |
|
"loss": 0.2217, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0421052631578948e-05, |
|
"loss": 0.2067, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0342105263157896e-05, |
|
"loss": 0.2351, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0263157894736843e-05, |
|
"loss": 0.1952, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.018421052631579e-05, |
|
"loss": 0.2259, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0105263157894738e-05, |
|
"loss": 0.2074, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0026315789473684e-05, |
|
"loss": 0.1963, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.94736842105263e-06, |
|
"loss": 0.2193, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.868421052631579e-06, |
|
"loss": 0.2172, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.789473684210527e-06, |
|
"loss": 0.2278, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.710526315789474e-06, |
|
"loss": 0.1933, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.63157894736842e-06, |
|
"loss": 0.2177, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.552631578947369e-06, |
|
"loss": 0.204, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 0.2178, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.394736842105262e-06, |
|
"loss": 0.2012, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.315789473684212e-06, |
|
"loss": 0.2054, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.236842105263159e-06, |
|
"loss": 0.2067, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.157894736842105e-06, |
|
"loss": 0.2132, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.078947368421054e-06, |
|
"loss": 0.2081, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9e-06, |
|
"loss": 0.2033, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.921052631578947e-06, |
|
"loss": 0.2158, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.842105263157893e-06, |
|
"loss": 0.2043, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.763157894736843e-06, |
|
"loss": 0.1979, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.68421052631579e-06, |
|
"loss": 0.213, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.605263157894737e-06, |
|
"loss": 0.2042, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.526315789473685e-06, |
|
"loss": 0.2067, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.447368421052632e-06, |
|
"loss": 0.2052, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.368421052631578e-06, |
|
"loss": 0.2036, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.289473684210526e-06, |
|
"loss": 0.2224, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.210526315789475e-06, |
|
"loss": 0.2106, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.131578947368421e-06, |
|
"loss": 0.1956, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.052631578947368e-06, |
|
"loss": 0.1973, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.973684210526316e-06, |
|
"loss": 0.2103, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.894736842105263e-06, |
|
"loss": 0.2011, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.81578947368421e-06, |
|
"loss": 0.2085, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.73684210526316e-06, |
|
"loss": 0.2042, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.657894736842106e-06, |
|
"loss": 0.2123, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.578947368421053e-06, |
|
"loss": 0.1853, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1943, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.421052631578948e-06, |
|
"loss": 0.2049, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.342105263157895e-06, |
|
"loss": 0.1986, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.2631578947368426e-06, |
|
"loss": 0.1912, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.184210526315789e-06, |
|
"loss": 0.2061, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.105263157894737e-06, |
|
"loss": 0.1968, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.026315789473685e-06, |
|
"loss": 0.1827, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 6.9473684210526315e-06, |
|
"loss": 0.2016, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.868421052631579e-06, |
|
"loss": 0.2212, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.7894736842105264e-06, |
|
"loss": 0.2108, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.710526315789474e-06, |
|
"loss": 0.2121, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.631578947368421e-06, |
|
"loss": 0.1888, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.552631578947369e-06, |
|
"loss": 0.2018, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.473684210526316e-06, |
|
"loss": 0.2049, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.394736842105263e-06, |
|
"loss": 0.2084, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.31578947368421e-06, |
|
"loss": 0.188, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.236842105263159e-06, |
|
"loss": 0.1863, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.157894736842105e-06, |
|
"loss": 0.2082, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.078947368421053e-06, |
|
"loss": 0.2047, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6e-06, |
|
"loss": 0.208, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.921052631578948e-06, |
|
"loss": 0.2067, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.842105263157895e-06, |
|
"loss": 0.2032, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.763157894736842e-06, |
|
"loss": 0.1891, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.68421052631579e-06, |
|
"loss": 0.176, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5.605263157894737e-06, |
|
"loss": 0.2063, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5.526315789473684e-06, |
|
"loss": 0.2006, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5.447368421052632e-06, |
|
"loss": 0.2016, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5.368421052631579e-06, |
|
"loss": 0.1875, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5.289473684210526e-06, |
|
"loss": 0.1951, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5.210526315789474e-06, |
|
"loss": 0.2017, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5.131578947368421e-06, |
|
"loss": 0.2072, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5.052631578947369e-06, |
|
"loss": 0.1919, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.973684210526315e-06, |
|
"loss": 0.2072, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.894736842105264e-06, |
|
"loss": 0.1955, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.81578947368421e-06, |
|
"loss": 0.2037, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.736842105263158e-06, |
|
"loss": 0.1891, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.657894736842106e-06, |
|
"loss": 0.1814, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.578947368421053e-06, |
|
"loss": 0.1737, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.2123, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.421052631578947e-06, |
|
"loss": 0.1984, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.342105263157895e-06, |
|
"loss": 0.1857, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.2631578947368425e-06, |
|
"loss": 0.1855, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.184210526315789e-06, |
|
"loss": 0.1993, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.105263157894737e-06, |
|
"loss": 0.1951, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.026315789473684e-06, |
|
"loss": 0.1976, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9473684210526315e-06, |
|
"loss": 0.1848, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.86842105263158e-06, |
|
"loss": 0.2031, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.7894736842105264e-06, |
|
"loss": 0.1773, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.710526315789474e-06, |
|
"loss": 0.1955, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.6315789473684213e-06, |
|
"loss": 0.1836, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.5526315789473683e-06, |
|
"loss": 0.199, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.4736842105263158e-06, |
|
"loss": 0.1827, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3947368421052632e-06, |
|
"loss": 0.1917, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3157894736842107e-06, |
|
"loss": 0.1849, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.236842105263158e-06, |
|
"loss": 0.1913, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 0.1822, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.0789473684210526e-06, |
|
"loss": 0.2005, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1969, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.9210526315789475e-06, |
|
"loss": 0.184, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.842105263157895e-06, |
|
"loss": 0.1829, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.763157894736842e-06, |
|
"loss": 0.1856, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.6842105263157895e-06, |
|
"loss": 0.18, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.605263157894737e-06, |
|
"loss": 0.1794, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.5263157894736844e-06, |
|
"loss": 0.1892, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.447368421052632e-06, |
|
"loss": 0.1953, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.368421052631579e-06, |
|
"loss": 0.1996, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.2894736842105263e-06, |
|
"loss": 0.1781, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.2105263157894734e-06, |
|
"loss": 0.1884, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.1315789473684212e-06, |
|
"loss": 0.1829, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.0526315789473687e-06, |
|
"loss": 0.1853, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9736842105263157e-06, |
|
"loss": 0.1999, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8947368421052632e-06, |
|
"loss": 0.181, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8157894736842106e-06, |
|
"loss": 0.1769, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7368421052631579e-06, |
|
"loss": 0.1965, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6578947368421053e-06, |
|
"loss": 0.1866, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5789473684210526e-06, |
|
"loss": 0.1848, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.204, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4210526315789475e-06, |
|
"loss": 0.1807, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3421052631578947e-06, |
|
"loss": 0.1858, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.2631578947368422e-06, |
|
"loss": 0.1953, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1842105263157894e-06, |
|
"loss": 0.1793, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1052631578947367e-06, |
|
"loss": 0.1987, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0263157894736843e-06, |
|
"loss": 0.1857, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.473684210526316e-07, |
|
"loss": 0.1767, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.684210526315789e-07, |
|
"loss": 0.1784, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.894736842105263e-07, |
|
"loss": 0.1657, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.105263157894737e-07, |
|
"loss": 0.1801, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.315789473684211e-07, |
|
"loss": 0.1853, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.526315789473683e-07, |
|
"loss": 0.1843, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.736842105263158e-07, |
|
"loss": 0.2039, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.9473684210526315e-07, |
|
"loss": 0.2031, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.1578947368421055e-07, |
|
"loss": 0.1851, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.368421052631579e-07, |
|
"loss": 0.1765, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5789473684210527e-07, |
|
"loss": 0.1724, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.894736842105264e-08, |
|
"loss": 0.1824, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0, |
|
"loss": 0.1716, |
|
"step": 4000 |
|
} |
|
], |
|
"max_steps": 4000, |
|
"num_train_epochs": 1, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|