{ "best_metric": 0.7822743590634694, "best_model_checkpoint": "./checkpoints/clip-stage3pa-1024/checkpoint-3810", "epoch": 0.7486430844095078, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.5e-06, "loss": 0.727, "step": 10 }, { "epoch": 0.0, "learning_rate": 3e-06, "loss": 0.6697, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.5e-06, "loss": 0.6164, "step": 30 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 0.5883, "step": 40 }, { "epoch": 0.01, "learning_rate": 7.5e-06, "loss": 0.5711, "step": 50 }, { "epoch": 0.01, "learning_rate": 9e-06, "loss": 0.5863, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.05e-05, "loss": 0.5441, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.2e-05, "loss": 0.5303, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.3500000000000001e-05, "loss": 0.5436, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.5e-05, "loss": 0.5507, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.65e-05, "loss": 0.5534, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.8e-05, "loss": 0.5384, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.95e-05, "loss": 0.5533, "step": 130 }, { "epoch": 0.03, "learning_rate": 2.1e-05, "loss": 0.5475, "step": 140 }, { "epoch": 0.03, "learning_rate": 2.25e-05, "loss": 0.5482, "step": 150 }, { "epoch": 0.03, "learning_rate": 2.4e-05, "loss": 0.5099, "step": 160 }, { "epoch": 0.03, "learning_rate": 2.55e-05, "loss": 0.539, "step": 170 }, { "epoch": 0.03, "learning_rate": 2.7000000000000002e-05, "loss": 0.5176, "step": 180 }, { "epoch": 0.04, "learning_rate": 2.8499999999999998e-05, "loss": 0.526, "step": 190 }, { "epoch": 0.04, "learning_rate": 3e-05, "loss": 0.5276, "step": 200 }, { "epoch": 0.04, "learning_rate": 2.992105263157895e-05, "loss": 0.5077, "step": 210 }, { "epoch": 0.04, "learning_rate": 2.9842105263157894e-05, "loss": 0.5375, "step": 220 }, { "epoch": 0.04, "learning_rate": 2.9763157894736842e-05, "loss": 0.5411, "step": 230 }, { "epoch": 0.04, "learning_rate": 2.968421052631579e-05, "loss": 0.4877, "step": 240 }, { "epoch": 0.05, "learning_rate": 2.9605263157894735e-05, "loss": 0.5347, "step": 250 }, { "epoch": 0.05, "learning_rate": 2.9526315789473684e-05, "loss": 0.4839, "step": 260 }, { "epoch": 0.05, "learning_rate": 2.9447368421052635e-05, "loss": 0.4936, "step": 270 }, { "epoch": 0.05, "learning_rate": 2.936842105263158e-05, "loss": 0.4849, "step": 280 }, { "epoch": 0.05, "learning_rate": 2.928947368421053e-05, "loss": 0.4696, "step": 290 }, { "epoch": 0.06, "learning_rate": 2.9210526315789474e-05, "loss": 0.4872, "step": 300 }, { "epoch": 0.06, "learning_rate": 2.9131578947368422e-05, "loss": 0.4636, "step": 310 }, { "epoch": 0.06, "learning_rate": 2.905263157894737e-05, "loss": 0.4728, "step": 320 }, { "epoch": 0.06, "learning_rate": 2.8973684210526315e-05, "loss": 0.4743, "step": 330 }, { "epoch": 0.06, "learning_rate": 2.8894736842105263e-05, "loss": 0.4774, "step": 340 }, { "epoch": 0.07, "learning_rate": 2.8815789473684212e-05, "loss": 0.4372, "step": 350 }, { "epoch": 0.07, "learning_rate": 2.8736842105263157e-05, "loss": 0.4515, "step": 360 }, { "epoch": 0.07, "learning_rate": 2.8657894736842105e-05, "loss": 0.422, "step": 370 }, { "epoch": 0.07, "learning_rate": 2.8578947368421053e-05, "loss": 0.4505, "step": 380 }, { "epoch": 0.07, "learning_rate": 2.8499999999999998e-05, "loss": 0.4383, "step": 390 }, { "epoch": 0.07, "learning_rate": 2.8421052631578946e-05, "loss": 0.4601, "step": 400 }, { "epoch": 0.08, "learning_rate": 2.8342105263157898e-05, "loss": 0.4327, "step": 410 }, { "epoch": 0.08, "learning_rate": 2.8263157894736843e-05, "loss": 0.4182, "step": 420 }, { "epoch": 0.08, "learning_rate": 2.818421052631579e-05, "loss": 0.4421, "step": 430 }, { "epoch": 0.08, "learning_rate": 2.810526315789474e-05, "loss": 0.427, "step": 440 }, { "epoch": 0.08, "learning_rate": 2.8026315789473685e-05, "loss": 0.4397, "step": 450 }, { "epoch": 0.09, "learning_rate": 2.7947368421052633e-05, "loss": 0.4249, "step": 460 }, { "epoch": 0.09, "learning_rate": 2.786842105263158e-05, "loss": 0.432, "step": 470 }, { "epoch": 0.09, "learning_rate": 2.7789473684210526e-05, "loss": 0.4264, "step": 480 }, { "epoch": 0.09, "learning_rate": 2.7710526315789474e-05, "loss": 0.4325, "step": 490 }, { "epoch": 0.09, "learning_rate": 2.7631578947368423e-05, "loss": 0.4204, "step": 500 }, { "epoch": 0.1, "learning_rate": 2.7552631578947368e-05, "loss": 0.4246, "step": 510 }, { "epoch": 0.1, "learning_rate": 2.7473684210526316e-05, "loss": 0.429, "step": 520 }, { "epoch": 0.1, "learning_rate": 2.739473684210526e-05, "loss": 0.4003, "step": 530 }, { "epoch": 0.1, "learning_rate": 2.7315789473684213e-05, "loss": 0.3946, "step": 540 }, { "epoch": 0.1, "learning_rate": 2.723684210526316e-05, "loss": 0.4035, "step": 550 }, { "epoch": 0.1, "learning_rate": 2.7157894736842106e-05, "loss": 0.384, "step": 560 }, { "epoch": 0.11, "learning_rate": 2.7078947368421054e-05, "loss": 0.4095, "step": 570 }, { "epoch": 0.11, "learning_rate": 2.7000000000000002e-05, "loss": 0.386, "step": 580 }, { "epoch": 0.11, "learning_rate": 2.6921052631578947e-05, "loss": 0.374, "step": 590 }, { "epoch": 0.11, "learning_rate": 2.6842105263157896e-05, "loss": 0.4155, "step": 600 }, { "epoch": 0.11, "learning_rate": 2.6763157894736844e-05, "loss": 0.403, "step": 610 }, { "epoch": 0.12, "learning_rate": 2.668421052631579e-05, "loss": 0.3722, "step": 620 }, { "epoch": 0.12, "learning_rate": 2.6605263157894737e-05, "loss": 0.39, "step": 630 }, { "epoch": 0.12, "learning_rate": 2.6526315789473685e-05, "loss": 0.4041, "step": 640 }, { "epoch": 0.12, "learning_rate": 2.644736842105263e-05, "loss": 0.4011, "step": 650 }, { "epoch": 0.12, "learning_rate": 2.636842105263158e-05, "loss": 0.3887, "step": 660 }, { "epoch": 0.13, "learning_rate": 2.6289473684210527e-05, "loss": 0.3807, "step": 670 }, { "epoch": 0.13, "learning_rate": 2.6210526315789475e-05, "loss": 0.3762, "step": 680 }, { "epoch": 0.13, "learning_rate": 2.6131578947368424e-05, "loss": 0.3831, "step": 690 }, { "epoch": 0.13, "learning_rate": 2.605263157894737e-05, "loss": 0.3408, "step": 700 }, { "epoch": 0.13, "learning_rate": 2.5973684210526317e-05, "loss": 0.362, "step": 710 }, { "epoch": 0.13, "learning_rate": 2.5894736842105265e-05, "loss": 0.3606, "step": 720 }, { "epoch": 0.14, "learning_rate": 2.581578947368421e-05, "loss": 0.3475, "step": 730 }, { "epoch": 0.14, "learning_rate": 2.5736842105263158e-05, "loss": 0.3659, "step": 740 }, { "epoch": 0.14, "learning_rate": 2.5657894736842107e-05, "loss": 0.3391, "step": 750 }, { "epoch": 0.14, "learning_rate": 2.557894736842105e-05, "loss": 0.3744, "step": 760 }, { "epoch": 0.14, "learning_rate": 2.55e-05, "loss": 0.3609, "step": 770 }, { "epoch": 0.15, "learning_rate": 2.5421052631578948e-05, "loss": 0.3525, "step": 780 }, { "epoch": 0.15, "learning_rate": 2.5342105263157893e-05, "loss": 0.3527, "step": 790 }, { "epoch": 0.15, "learning_rate": 2.526315789473684e-05, "loss": 0.3566, "step": 800 }, { "epoch": 0.15, "learning_rate": 2.518421052631579e-05, "loss": 0.3327, "step": 810 }, { "epoch": 0.15, "learning_rate": 2.5105263157894738e-05, "loss": 0.3789, "step": 820 }, { "epoch": 0.16, "learning_rate": 2.5026315789473686e-05, "loss": 0.3415, "step": 830 }, { "epoch": 0.16, "learning_rate": 2.4947368421052635e-05, "loss": 0.3342, "step": 840 }, { "epoch": 0.16, "learning_rate": 2.486842105263158e-05, "loss": 0.3415, "step": 850 }, { "epoch": 0.16, "learning_rate": 2.4789473684210528e-05, "loss": 0.3562, "step": 860 }, { "epoch": 0.16, "learning_rate": 2.4710526315789476e-05, "loss": 0.3405, "step": 870 }, { "epoch": 0.16, "learning_rate": 2.463157894736842e-05, "loss": 0.368, "step": 880 }, { "epoch": 0.17, "learning_rate": 2.455263157894737e-05, "loss": 0.3547, "step": 890 }, { "epoch": 0.17, "learning_rate": 2.4473684210526318e-05, "loss": 0.3508, "step": 900 }, { "epoch": 0.17, "learning_rate": 2.4394736842105262e-05, "loss": 0.34, "step": 910 }, { "epoch": 0.17, "learning_rate": 2.431578947368421e-05, "loss": 0.3256, "step": 920 }, { "epoch": 0.17, "learning_rate": 2.4236842105263156e-05, "loss": 0.3515, "step": 930 }, { "epoch": 0.18, "learning_rate": 2.4157894736842104e-05, "loss": 0.316, "step": 940 }, { "epoch": 0.18, "learning_rate": 2.4078947368421056e-05, "loss": 0.3382, "step": 950 }, { "epoch": 0.18, "learning_rate": 2.4e-05, "loss": 0.3314, "step": 960 }, { "epoch": 0.18, "learning_rate": 2.392105263157895e-05, "loss": 0.3285, "step": 970 }, { "epoch": 0.18, "learning_rate": 2.3842105263157897e-05, "loss": 0.3472, "step": 980 }, { "epoch": 0.19, "learning_rate": 2.3763157894736842e-05, "loss": 0.3183, "step": 990 }, { "epoch": 0.19, "learning_rate": 2.368421052631579e-05, "loss": 0.332, "step": 1000 }, { "epoch": 0.19, "learning_rate": 2.360526315789474e-05, "loss": 0.314, "step": 1010 }, { "epoch": 0.19, "learning_rate": 2.3526315789473684e-05, "loss": 0.3127, "step": 1020 }, { "epoch": 0.19, "learning_rate": 2.3447368421052632e-05, "loss": 0.3483, "step": 1030 }, { "epoch": 0.19, "learning_rate": 2.336842105263158e-05, "loss": 0.3408, "step": 1040 }, { "epoch": 0.2, "learning_rate": 2.3289473684210525e-05, "loss": 0.3406, "step": 1050 }, { "epoch": 0.2, "learning_rate": 2.3210526315789473e-05, "loss": 0.3205, "step": 1060 }, { "epoch": 0.2, "learning_rate": 2.3131578947368422e-05, "loss": 0.3135, "step": 1070 }, { "epoch": 0.2, "learning_rate": 2.3052631578947367e-05, "loss": 0.3107, "step": 1080 }, { "epoch": 0.2, "learning_rate": 2.297368421052632e-05, "loss": 0.3079, "step": 1090 }, { "epoch": 0.21, "learning_rate": 2.2894736842105263e-05, "loss": 0.3141, "step": 1100 }, { "epoch": 0.21, "learning_rate": 2.281578947368421e-05, "loss": 0.3186, "step": 1110 }, { "epoch": 0.21, "learning_rate": 2.273684210526316e-05, "loss": 0.3254, "step": 1120 }, { "epoch": 0.21, "learning_rate": 2.2657894736842105e-05, "loss": 0.3219, "step": 1130 }, { "epoch": 0.21, "learning_rate": 2.2578947368421053e-05, "loss": 0.3106, "step": 1140 }, { "epoch": 0.22, "learning_rate": 2.25e-05, "loss": 0.3042, "step": 1150 }, { "epoch": 0.22, "learning_rate": 2.2421052631578946e-05, "loss": 0.3154, "step": 1160 }, { "epoch": 0.22, "learning_rate": 2.2342105263157895e-05, "loss": 0.3147, "step": 1170 }, { "epoch": 0.22, "learning_rate": 2.2263157894736843e-05, "loss": 0.2916, "step": 1180 }, { "epoch": 0.22, "learning_rate": 2.2184210526315788e-05, "loss": 0.3159, "step": 1190 }, { "epoch": 0.22, "learning_rate": 2.2105263157894736e-05, "loss": 0.3084, "step": 1200 }, { "epoch": 0.23, "learning_rate": 2.2026315789473684e-05, "loss": 0.3071, "step": 1210 }, { "epoch": 0.23, "learning_rate": 2.1947368421052633e-05, "loss": 0.312, "step": 1220 }, { "epoch": 0.23, "learning_rate": 2.186842105263158e-05, "loss": 0.3018, "step": 1230 }, { "epoch": 0.23, "learning_rate": 2.178947368421053e-05, "loss": 0.3195, "step": 1240 }, { "epoch": 0.23, "learning_rate": 2.1710526315789474e-05, "loss": 0.3115, "step": 1250 }, { "epoch": 0.24, "learning_rate": 2.1631578947368423e-05, "loss": 0.2827, "step": 1260 }, { "epoch": 0.24, "learning_rate": 2.155263157894737e-05, "loss": 0.3168, "step": 1270 }, { "epoch": 0.24, "learning_rate": 2.1473684210526316e-05, "loss": 0.3004, "step": 1280 }, { "epoch": 0.24, "learning_rate": 2.1394736842105264e-05, "loss": 0.3084, "step": 1290 }, { "epoch": 0.24, "learning_rate": 2.1315789473684212e-05, "loss": 0.3065, "step": 1300 }, { "epoch": 0.25, "learning_rate": 2.1236842105263157e-05, "loss": 0.318, "step": 1310 }, { "epoch": 0.25, "learning_rate": 2.1157894736842106e-05, "loss": 0.3035, "step": 1320 }, { "epoch": 0.25, "learning_rate": 2.107894736842105e-05, "loss": 0.2849, "step": 1330 }, { "epoch": 0.25, "learning_rate": 2.1e-05, "loss": 0.2861, "step": 1340 }, { "epoch": 0.25, "learning_rate": 2.0921052631578947e-05, "loss": 0.2852, "step": 1350 }, { "epoch": 0.25, "learning_rate": 2.0842105263157895e-05, "loss": 0.3204, "step": 1360 }, { "epoch": 0.26, "learning_rate": 2.0763157894736844e-05, "loss": 0.31, "step": 1370 }, { "epoch": 0.26, "learning_rate": 2.0684210526315792e-05, "loss": 0.2854, "step": 1380 }, { "epoch": 0.26, "learning_rate": 2.0605263157894737e-05, "loss": 0.2855, "step": 1390 }, { "epoch": 0.26, "learning_rate": 2.0526315789473685e-05, "loss": 0.3003, "step": 1400 }, { "epoch": 0.26, "learning_rate": 2.0447368421052634e-05, "loss": 0.2889, "step": 1410 }, { "epoch": 0.27, "learning_rate": 2.036842105263158e-05, "loss": 0.2834, "step": 1420 }, { "epoch": 0.27, "learning_rate": 2.0289473684210527e-05, "loss": 0.2696, "step": 1430 }, { "epoch": 0.27, "learning_rate": 2.0210526315789475e-05, "loss": 0.289, "step": 1440 }, { "epoch": 0.27, "learning_rate": 2.013157894736842e-05, "loss": 0.2851, "step": 1450 }, { "epoch": 0.27, "learning_rate": 2.0052631578947368e-05, "loss": 0.2903, "step": 1460 }, { "epoch": 0.28, "learning_rate": 1.9973684210526317e-05, "loss": 0.2742, "step": 1470 }, { "epoch": 0.28, "learning_rate": 1.989473684210526e-05, "loss": 0.2775, "step": 1480 }, { "epoch": 0.28, "learning_rate": 1.9815789473684213e-05, "loss": 0.2783, "step": 1490 }, { "epoch": 0.28, "learning_rate": 1.9736842105263158e-05, "loss": 0.2814, "step": 1500 }, { "epoch": 0.28, "learning_rate": 1.9657894736842106e-05, "loss": 0.2931, "step": 1510 }, { "epoch": 0.28, "learning_rate": 1.9578947368421055e-05, "loss": 0.2811, "step": 1520 }, { "epoch": 0.29, "learning_rate": 1.95e-05, "loss": 0.29, "step": 1530 }, { "epoch": 0.29, "learning_rate": 1.9421052631578948e-05, "loss": 0.2925, "step": 1540 }, { "epoch": 0.29, "learning_rate": 1.9342105263157896e-05, "loss": 0.2658, "step": 1550 }, { "epoch": 0.29, "learning_rate": 1.926315789473684e-05, "loss": 0.2959, "step": 1560 }, { "epoch": 0.29, "learning_rate": 1.918421052631579e-05, "loss": 0.2695, "step": 1570 }, { "epoch": 0.3, "learning_rate": 1.9105263157894738e-05, "loss": 0.2902, "step": 1580 }, { "epoch": 0.3, "learning_rate": 1.9026315789473683e-05, "loss": 0.2597, "step": 1590 }, { "epoch": 0.3, "learning_rate": 1.894736842105263e-05, "loss": 0.2745, "step": 1600 }, { "epoch": 0.3, "learning_rate": 1.886842105263158e-05, "loss": 0.2775, "step": 1610 }, { "epoch": 0.3, "learning_rate": 1.8789473684210524e-05, "loss": 0.2707, "step": 1620 }, { "epoch": 0.31, "learning_rate": 1.8710526315789476e-05, "loss": 0.2699, "step": 1630 }, { "epoch": 0.31, "learning_rate": 1.8631578947368424e-05, "loss": 0.2762, "step": 1640 }, { "epoch": 0.31, "learning_rate": 1.855263157894737e-05, "loss": 0.2539, "step": 1650 }, { "epoch": 0.31, "learning_rate": 1.8473684210526317e-05, "loss": 0.273, "step": 1660 }, { "epoch": 0.31, "learning_rate": 1.8394736842105266e-05, "loss": 0.244, "step": 1670 }, { "epoch": 0.31, "learning_rate": 1.831578947368421e-05, "loss": 0.2632, "step": 1680 }, { "epoch": 0.32, "learning_rate": 1.823684210526316e-05, "loss": 0.2756, "step": 1690 }, { "epoch": 0.32, "learning_rate": 1.8157894736842107e-05, "loss": 0.2555, "step": 1700 }, { "epoch": 0.32, "learning_rate": 1.8078947368421052e-05, "loss": 0.2863, "step": 1710 }, { "epoch": 0.32, "learning_rate": 1.8e-05, "loss": 0.2516, "step": 1720 }, { "epoch": 0.32, "learning_rate": 1.7921052631578945e-05, "loss": 0.2467, "step": 1730 }, { "epoch": 0.33, "learning_rate": 1.7842105263157894e-05, "loss": 0.2591, "step": 1740 }, { "epoch": 0.33, "learning_rate": 1.7763157894736842e-05, "loss": 0.2641, "step": 1750 }, { "epoch": 0.33, "learning_rate": 1.7684210526315787e-05, "loss": 0.2658, "step": 1760 }, { "epoch": 0.33, "learning_rate": 1.760526315789474e-05, "loss": 0.252, "step": 1770 }, { "epoch": 0.33, "learning_rate": 1.7526315789473687e-05, "loss": 0.2654, "step": 1780 }, { "epoch": 0.34, "learning_rate": 1.7447368421052632e-05, "loss": 0.2723, "step": 1790 }, { "epoch": 0.34, "learning_rate": 1.736842105263158e-05, "loss": 0.2716, "step": 1800 }, { "epoch": 0.34, "learning_rate": 1.728947368421053e-05, "loss": 0.2741, "step": 1810 }, { "epoch": 0.34, "learning_rate": 1.7210526315789473e-05, "loss": 0.2708, "step": 1820 }, { "epoch": 0.34, "learning_rate": 1.713157894736842e-05, "loss": 0.2484, "step": 1830 }, { "epoch": 0.34, "learning_rate": 1.705263157894737e-05, "loss": 0.2615, "step": 1840 }, { "epoch": 0.35, "learning_rate": 1.6973684210526315e-05, "loss": 0.2547, "step": 1850 }, { "epoch": 0.35, "learning_rate": 1.6894736842105263e-05, "loss": 0.264, "step": 1860 }, { "epoch": 0.35, "learning_rate": 1.681578947368421e-05, "loss": 0.2514, "step": 1870 }, { "epoch": 0.35, "learning_rate": 1.6736842105263156e-05, "loss": 0.2385, "step": 1880 }, { "epoch": 0.35, "learning_rate": 1.6657894736842105e-05, "loss": 0.2539, "step": 1890 }, { "epoch": 0.36, "learning_rate": 1.6578947368421053e-05, "loss": 0.24, "step": 1900 }, { "epoch": 0.36, "learning_rate": 1.65e-05, "loss": 0.2509, "step": 1910 }, { "epoch": 0.36, "learning_rate": 1.642105263157895e-05, "loss": 0.2573, "step": 1920 }, { "epoch": 0.36, "learning_rate": 1.6342105263157894e-05, "loss": 0.2427, "step": 1930 }, { "epoch": 0.36, "learning_rate": 1.6263157894736843e-05, "loss": 0.2572, "step": 1940 }, { "epoch": 0.36, "learning_rate": 1.618421052631579e-05, "loss": 0.2522, "step": 1950 }, { "epoch": 0.37, "learning_rate": 1.6105263157894736e-05, "loss": 0.2553, "step": 1960 }, { "epoch": 0.37, "learning_rate": 1.6026315789473684e-05, "loss": 0.2436, "step": 1970 }, { "epoch": 0.37, "learning_rate": 1.5947368421052633e-05, "loss": 0.2572, "step": 1980 }, { "epoch": 0.37, "learning_rate": 1.5868421052631578e-05, "loss": 0.2541, "step": 1990 }, { "epoch": 0.37, "learning_rate": 1.5789473684210526e-05, "loss": 0.2417, "step": 2000 }, { "epoch": 0.38, "learning_rate": 1.5710526315789474e-05, "loss": 0.2586, "step": 2010 }, { "epoch": 0.38, "learning_rate": 1.563157894736842e-05, "loss": 0.2383, "step": 2020 }, { "epoch": 0.38, "learning_rate": 1.5552631578947367e-05, "loss": 0.2497, "step": 2030 }, { "epoch": 0.38, "learning_rate": 1.547368421052632e-05, "loss": 0.2357, "step": 2040 }, { "epoch": 0.38, "learning_rate": 1.5394736842105264e-05, "loss": 0.26, "step": 2050 }, { "epoch": 0.39, "learning_rate": 1.5315789473684212e-05, "loss": 0.2442, "step": 2060 }, { "epoch": 0.39, "learning_rate": 1.5236842105263159e-05, "loss": 0.2269, "step": 2070 }, { "epoch": 0.39, "learning_rate": 1.5157894736842105e-05, "loss": 0.2419, "step": 2080 }, { "epoch": 0.39, "learning_rate": 1.5078947368421054e-05, "loss": 0.2442, "step": 2090 }, { "epoch": 0.39, "learning_rate": 1.5e-05, "loss": 0.2331, "step": 2100 }, { "epoch": 0.39, "learning_rate": 1.4921052631578947e-05, "loss": 0.2311, "step": 2110 }, { "epoch": 0.4, "learning_rate": 1.4842105263157895e-05, "loss": 0.2444, "step": 2120 }, { "epoch": 0.4, "learning_rate": 1.4763157894736842e-05, "loss": 0.2409, "step": 2130 }, { "epoch": 0.4, "learning_rate": 1.468421052631579e-05, "loss": 0.2365, "step": 2140 }, { "epoch": 0.4, "learning_rate": 1.4605263157894737e-05, "loss": 0.2517, "step": 2150 }, { "epoch": 0.4, "learning_rate": 1.4526315789473685e-05, "loss": 0.2562, "step": 2160 }, { "epoch": 0.41, "learning_rate": 1.4447368421052632e-05, "loss": 0.2233, "step": 2170 }, { "epoch": 0.41, "learning_rate": 1.4368421052631578e-05, "loss": 0.2496, "step": 2180 }, { "epoch": 0.41, "learning_rate": 1.4289473684210527e-05, "loss": 0.2452, "step": 2190 }, { "epoch": 0.41, "learning_rate": 1.4210526315789473e-05, "loss": 0.2374, "step": 2200 }, { "epoch": 0.41, "learning_rate": 1.4131578947368422e-05, "loss": 0.2233, "step": 2210 }, { "epoch": 0.42, "learning_rate": 1.405263157894737e-05, "loss": 0.2571, "step": 2220 }, { "epoch": 0.42, "learning_rate": 1.3973684210526316e-05, "loss": 0.2254, "step": 2230 }, { "epoch": 0.42, "learning_rate": 1.3894736842105263e-05, "loss": 0.2236, "step": 2240 }, { "epoch": 0.42, "learning_rate": 1.3815789473684211e-05, "loss": 0.2284, "step": 2250 }, { "epoch": 0.42, "learning_rate": 1.3736842105263158e-05, "loss": 0.232, "step": 2260 }, { "epoch": 0.42, "learning_rate": 1.3657894736842106e-05, "loss": 0.2415, "step": 2270 }, { "epoch": 0.43, "learning_rate": 1.3578947368421053e-05, "loss": 0.2244, "step": 2280 }, { "epoch": 0.43, "learning_rate": 1.3500000000000001e-05, "loss": 0.2338, "step": 2290 }, { "epoch": 0.43, "learning_rate": 1.3421052631578948e-05, "loss": 0.2382, "step": 2300 }, { "epoch": 0.43, "learning_rate": 1.3342105263157894e-05, "loss": 0.2349, "step": 2310 }, { "epoch": 0.43, "learning_rate": 1.3263157894736843e-05, "loss": 0.2267, "step": 2320 }, { "epoch": 0.44, "learning_rate": 1.318421052631579e-05, "loss": 0.2426, "step": 2330 }, { "epoch": 0.44, "learning_rate": 1.3105263157894738e-05, "loss": 0.238, "step": 2340 }, { "epoch": 0.44, "learning_rate": 1.3026315789473684e-05, "loss": 0.2332, "step": 2350 }, { "epoch": 0.44, "learning_rate": 1.2947368421052633e-05, "loss": 0.2332, "step": 2360 }, { "epoch": 0.44, "learning_rate": 1.2868421052631579e-05, "loss": 0.2477, "step": 2370 }, { "epoch": 0.45, "learning_rate": 1.2789473684210526e-05, "loss": 0.2176, "step": 2380 }, { "epoch": 0.45, "learning_rate": 1.2710526315789474e-05, "loss": 0.2177, "step": 2390 }, { "epoch": 0.45, "learning_rate": 1.263157894736842e-05, "loss": 0.2278, "step": 2400 }, { "epoch": 0.45, "learning_rate": 1.2552631578947369e-05, "loss": 0.2221, "step": 2410 }, { "epoch": 0.45, "learning_rate": 1.2473684210526317e-05, "loss": 0.2173, "step": 2420 }, { "epoch": 0.45, "learning_rate": 1.2394736842105264e-05, "loss": 0.2131, "step": 2430 }, { "epoch": 0.46, "learning_rate": 1.231578947368421e-05, "loss": 0.2346, "step": 2440 }, { "epoch": 0.46, "learning_rate": 1.2236842105263159e-05, "loss": 0.2301, "step": 2450 }, { "epoch": 0.46, "learning_rate": 1.2157894736842105e-05, "loss": 0.2173, "step": 2460 }, { "epoch": 0.46, "learning_rate": 1.2078947368421052e-05, "loss": 0.2297, "step": 2470 }, { "epoch": 0.46, "learning_rate": 1.2e-05, "loss": 0.2255, "step": 2480 }, { "epoch": 0.47, "learning_rate": 1.1921052631578949e-05, "loss": 0.2307, "step": 2490 }, { "epoch": 0.47, "learning_rate": 1.1842105263157895e-05, "loss": 0.222, "step": 2500 }, { "epoch": 0.47, "learning_rate": 1.1763157894736842e-05, "loss": 0.2319, "step": 2510 }, { "epoch": 0.47, "learning_rate": 1.168421052631579e-05, "loss": 0.228, "step": 2520 }, { "epoch": 0.47, "learning_rate": 1.1605263157894737e-05, "loss": 0.2267, "step": 2530 }, { "epoch": 0.48, "learning_rate": 1.1526315789473683e-05, "loss": 0.2163, "step": 2540 }, { "epoch": 0.48, "learning_rate": 1.1447368421052632e-05, "loss": 0.2167, "step": 2550 }, { "epoch": 0.48, "learning_rate": 1.136842105263158e-05, "loss": 0.1991, "step": 2560 }, { "epoch": 0.48, "learning_rate": 1.1289473684210527e-05, "loss": 0.2165, "step": 2570 }, { "epoch": 0.48, "learning_rate": 1.1210526315789473e-05, "loss": 0.2147, "step": 2580 }, { "epoch": 0.48, "learning_rate": 1.1131578947368421e-05, "loss": 0.2203, "step": 2590 }, { "epoch": 0.49, "learning_rate": 1.1052631578947368e-05, "loss": 0.2253, "step": 2600 }, { "epoch": 0.49, "learning_rate": 1.0973684210526316e-05, "loss": 0.2326, "step": 2610 }, { "epoch": 0.49, "learning_rate": 1.0894736842105265e-05, "loss": 0.221, "step": 2620 }, { "epoch": 0.49, "learning_rate": 1.0815789473684211e-05, "loss": 0.2233, "step": 2630 }, { "epoch": 0.49, "learning_rate": 1.0736842105263158e-05, "loss": 0.2199, "step": 2640 }, { "epoch": 0.5, "learning_rate": 1.0657894736842106e-05, "loss": 0.2077, "step": 2650 }, { "epoch": 0.5, "learning_rate": 1.0578947368421053e-05, "loss": 0.2108, "step": 2660 }, { "epoch": 0.5, "learning_rate": 1.05e-05, "loss": 0.2217, "step": 2670 }, { "epoch": 0.5, "learning_rate": 1.0421052631578948e-05, "loss": 0.2067, "step": 2680 }, { "epoch": 0.5, "learning_rate": 1.0342105263157896e-05, "loss": 0.2351, "step": 2690 }, { "epoch": 0.51, "learning_rate": 1.0263157894736843e-05, "loss": 0.1952, "step": 2700 }, { "epoch": 0.51, "learning_rate": 1.018421052631579e-05, "loss": 0.2259, "step": 2710 }, { "epoch": 0.51, "learning_rate": 1.0105263157894738e-05, "loss": 0.2074, "step": 2720 }, { "epoch": 0.51, "learning_rate": 1.0026315789473684e-05, "loss": 0.1963, "step": 2730 }, { "epoch": 0.51, "learning_rate": 9.94736842105263e-06, "loss": 0.2193, "step": 2740 }, { "epoch": 0.51, "learning_rate": 9.868421052631579e-06, "loss": 0.2172, "step": 2750 }, { "epoch": 0.52, "learning_rate": 9.789473684210527e-06, "loss": 0.2278, "step": 2760 }, { "epoch": 0.52, "learning_rate": 9.710526315789474e-06, "loss": 0.1933, "step": 2770 }, { "epoch": 0.52, "learning_rate": 9.63157894736842e-06, "loss": 0.2177, "step": 2780 }, { "epoch": 0.52, "learning_rate": 9.552631578947369e-06, "loss": 0.204, "step": 2790 }, { "epoch": 0.52, "learning_rate": 9.473684210526315e-06, "loss": 0.2178, "step": 2800 }, { "epoch": 0.53, "learning_rate": 9.394736842105262e-06, "loss": 0.2012, "step": 2810 }, { "epoch": 0.53, "learning_rate": 9.315789473684212e-06, "loss": 0.2054, "step": 2820 }, { "epoch": 0.53, "learning_rate": 9.236842105263159e-06, "loss": 0.2067, "step": 2830 }, { "epoch": 0.53, "learning_rate": 9.157894736842105e-06, "loss": 0.2132, "step": 2840 }, { "epoch": 0.53, "learning_rate": 9.078947368421054e-06, "loss": 0.2081, "step": 2850 }, { "epoch": 0.54, "learning_rate": 9e-06, "loss": 0.2033, "step": 2860 }, { "epoch": 0.54, "learning_rate": 8.921052631578947e-06, "loss": 0.2158, "step": 2870 }, { "epoch": 0.54, "learning_rate": 8.842105263157893e-06, "loss": 0.2043, "step": 2880 }, { "epoch": 0.54, "learning_rate": 8.763157894736843e-06, "loss": 0.1979, "step": 2890 }, { "epoch": 0.54, "learning_rate": 8.68421052631579e-06, "loss": 0.213, "step": 2900 }, { "epoch": 0.54, "learning_rate": 8.605263157894737e-06, "loss": 0.2042, "step": 2910 }, { "epoch": 0.55, "learning_rate": 8.526315789473685e-06, "loss": 0.2067, "step": 2920 }, { "epoch": 0.55, "learning_rate": 8.447368421052632e-06, "loss": 0.2052, "step": 2930 }, { "epoch": 0.55, "learning_rate": 8.368421052631578e-06, "loss": 0.2036, "step": 2940 }, { "epoch": 0.55, "learning_rate": 8.289473684210526e-06, "loss": 0.2224, "step": 2950 }, { "epoch": 0.55, "learning_rate": 8.210526315789475e-06, "loss": 0.2106, "step": 2960 }, { "epoch": 0.56, "learning_rate": 8.131578947368421e-06, "loss": 0.1956, "step": 2970 }, { "epoch": 0.56, "learning_rate": 8.052631578947368e-06, "loss": 0.1973, "step": 2980 }, { "epoch": 0.56, "learning_rate": 7.973684210526316e-06, "loss": 0.2103, "step": 2990 }, { "epoch": 0.56, "learning_rate": 7.894736842105263e-06, "loss": 0.2011, "step": 3000 }, { "epoch": 0.56, "learning_rate": 7.81578947368421e-06, "loss": 0.2085, "step": 3010 }, { "epoch": 0.57, "learning_rate": 7.73684210526316e-06, "loss": 0.2042, "step": 3020 }, { "epoch": 0.57, "learning_rate": 7.657894736842106e-06, "loss": 0.2123, "step": 3030 }, { "epoch": 0.57, "learning_rate": 7.578947368421053e-06, "loss": 0.1853, "step": 3040 }, { "epoch": 0.57, "learning_rate": 7.5e-06, "loss": 0.1943, "step": 3050 }, { "epoch": 0.57, "learning_rate": 7.421052631578948e-06, "loss": 0.2049, "step": 3060 }, { "epoch": 0.57, "learning_rate": 7.342105263157895e-06, "loss": 0.1986, "step": 3070 }, { "epoch": 0.58, "learning_rate": 7.2631578947368426e-06, "loss": 0.1912, "step": 3080 }, { "epoch": 0.58, "learning_rate": 7.184210526315789e-06, "loss": 0.2061, "step": 3090 }, { "epoch": 0.58, "learning_rate": 7.105263157894737e-06, "loss": 0.1968, "step": 3100 }, { "epoch": 0.58, "learning_rate": 7.026315789473685e-06, "loss": 0.1827, "step": 3110 }, { "epoch": 0.58, "learning_rate": 6.9473684210526315e-06, "loss": 0.2016, "step": 3120 }, { "epoch": 0.59, "learning_rate": 6.868421052631579e-06, "loss": 0.2212, "step": 3130 }, { "epoch": 0.59, "learning_rate": 6.7894736842105264e-06, "loss": 0.2108, "step": 3140 }, { "epoch": 0.59, "learning_rate": 6.710526315789474e-06, "loss": 0.2121, "step": 3150 }, { "epoch": 0.59, "learning_rate": 6.631578947368421e-06, "loss": 0.1888, "step": 3160 }, { "epoch": 0.59, "learning_rate": 6.552631578947369e-06, "loss": 0.2018, "step": 3170 }, { "epoch": 0.6, "learning_rate": 6.473684210526316e-06, "loss": 0.2049, "step": 3180 }, { "epoch": 0.6, "learning_rate": 6.394736842105263e-06, "loss": 0.2084, "step": 3190 }, { "epoch": 0.6, "learning_rate": 6.31578947368421e-06, "loss": 0.188, "step": 3200 }, { "epoch": 0.6, "learning_rate": 6.236842105263159e-06, "loss": 0.1863, "step": 3210 }, { "epoch": 0.6, "learning_rate": 6.157894736842105e-06, "loss": 0.2082, "step": 3220 }, { "epoch": 0.6, "learning_rate": 6.078947368421053e-06, "loss": 0.2047, "step": 3230 }, { "epoch": 0.61, "learning_rate": 6e-06, "loss": 0.208, "step": 3240 }, { "epoch": 0.61, "learning_rate": 5.921052631578948e-06, "loss": 0.2067, "step": 3250 }, { "epoch": 0.61, "learning_rate": 5.842105263157895e-06, "loss": 0.2032, "step": 3260 }, { "epoch": 0.61, "learning_rate": 5.763157894736842e-06, "loss": 0.1891, "step": 3270 }, { "epoch": 0.61, "learning_rate": 5.68421052631579e-06, "loss": 0.176, "step": 3280 }, { "epoch": 0.62, "learning_rate": 5.605263157894737e-06, "loss": 0.2063, "step": 3290 }, { "epoch": 0.62, "learning_rate": 5.526315789473684e-06, "loss": 0.2006, "step": 3300 }, { "epoch": 0.62, "learning_rate": 5.447368421052632e-06, "loss": 0.2016, "step": 3310 }, { "epoch": 0.62, "learning_rate": 5.368421052631579e-06, "loss": 0.1875, "step": 3320 }, { "epoch": 0.62, "learning_rate": 5.289473684210526e-06, "loss": 0.1951, "step": 3330 }, { "epoch": 0.63, "learning_rate": 5.210526315789474e-06, "loss": 0.2017, "step": 3340 }, { "epoch": 0.63, "learning_rate": 5.131578947368421e-06, "loss": 0.2072, "step": 3350 }, { "epoch": 0.63, "learning_rate": 5.052631578947369e-06, "loss": 0.1919, "step": 3360 }, { "epoch": 0.63, "learning_rate": 4.973684210526315e-06, "loss": 0.2072, "step": 3370 }, { "epoch": 0.63, "learning_rate": 4.894736842105264e-06, "loss": 0.1955, "step": 3380 }, { "epoch": 0.63, "learning_rate": 4.81578947368421e-06, "loss": 0.2037, "step": 3390 }, { "epoch": 0.64, "learning_rate": 4.736842105263158e-06, "loss": 0.1891, "step": 3400 }, { "epoch": 0.64, "learning_rate": 4.657894736842106e-06, "loss": 0.1814, "step": 3410 }, { "epoch": 0.64, "learning_rate": 4.578947368421053e-06, "loss": 0.1737, "step": 3420 }, { "epoch": 0.64, "learning_rate": 4.5e-06, "loss": 0.2123, "step": 3430 }, { "epoch": 0.64, "learning_rate": 4.421052631578947e-06, "loss": 0.1984, "step": 3440 }, { "epoch": 0.65, "learning_rate": 4.342105263157895e-06, "loss": 0.1857, "step": 3450 }, { "epoch": 0.65, "learning_rate": 4.2631578947368425e-06, "loss": 0.1855, "step": 3460 }, { "epoch": 0.65, "learning_rate": 4.184210526315789e-06, "loss": 0.1993, "step": 3470 }, { "epoch": 0.65, "learning_rate": 4.105263157894737e-06, "loss": 0.1951, "step": 3480 }, { "epoch": 0.65, "learning_rate": 4.026315789473684e-06, "loss": 0.1976, "step": 3490 }, { "epoch": 0.66, "learning_rate": 3.9473684210526315e-06, "loss": 0.1848, "step": 3500 }, { "epoch": 0.66, "learning_rate": 3.86842105263158e-06, "loss": 0.2031, "step": 3510 }, { "epoch": 0.66, "learning_rate": 3.7894736842105264e-06, "loss": 0.1773, "step": 3520 }, { "epoch": 0.66, "learning_rate": 3.710526315789474e-06, "loss": 0.1955, "step": 3530 }, { "epoch": 0.66, "learning_rate": 3.6315789473684213e-06, "loss": 0.1836, "step": 3540 }, { "epoch": 0.66, "learning_rate": 3.5526315789473683e-06, "loss": 0.199, "step": 3550 }, { "epoch": 0.67, "learning_rate": 3.4736842105263158e-06, "loss": 0.1827, "step": 3560 }, { "epoch": 0.67, "learning_rate": 3.3947368421052632e-06, "loss": 0.1917, "step": 3570 }, { "epoch": 0.67, "learning_rate": 3.3157894736842107e-06, "loss": 0.1849, "step": 3580 }, { "epoch": 0.67, "learning_rate": 3.236842105263158e-06, "loss": 0.1913, "step": 3590 }, { "epoch": 0.67, "learning_rate": 3.157894736842105e-06, "loss": 0.1822, "step": 3600 }, { "epoch": 0.68, "learning_rate": 3.0789473684210526e-06, "loss": 0.2005, "step": 3610 }, { "epoch": 0.68, "learning_rate": 3e-06, "loss": 0.1969, "step": 3620 }, { "epoch": 0.68, "learning_rate": 2.9210526315789475e-06, "loss": 0.184, "step": 3630 }, { "epoch": 0.68, "learning_rate": 2.842105263157895e-06, "loss": 0.1829, "step": 3640 }, { "epoch": 0.68, "learning_rate": 2.763157894736842e-06, "loss": 0.1856, "step": 3650 }, { "epoch": 0.69, "learning_rate": 2.6842105263157895e-06, "loss": 0.18, "step": 3660 }, { "epoch": 0.69, "learning_rate": 2.605263157894737e-06, "loss": 0.1794, "step": 3670 }, { "epoch": 0.69, "learning_rate": 2.5263157894736844e-06, "loss": 0.1892, "step": 3680 }, { "epoch": 0.69, "learning_rate": 2.447368421052632e-06, "loss": 0.1953, "step": 3690 }, { "epoch": 0.69, "learning_rate": 2.368421052631579e-06, "loss": 0.1996, "step": 3700 }, { "epoch": 0.69, "learning_rate": 2.2894736842105263e-06, "loss": 0.1781, "step": 3710 }, { "epoch": 0.7, "learning_rate": 2.2105263157894734e-06, "loss": 0.1884, "step": 3720 }, { "epoch": 0.7, "learning_rate": 2.1315789473684212e-06, "loss": 0.1829, "step": 3730 }, { "epoch": 0.7, "learning_rate": 2.0526315789473687e-06, "loss": 0.1853, "step": 3740 }, { "epoch": 0.7, "learning_rate": 1.9736842105263157e-06, "loss": 0.1999, "step": 3750 }, { "epoch": 0.7, "learning_rate": 1.8947368421052632e-06, "loss": 0.181, "step": 3760 }, { "epoch": 0.71, "learning_rate": 1.8157894736842106e-06, "loss": 0.1769, "step": 3770 }, { "epoch": 0.71, "learning_rate": 1.7368421052631579e-06, "loss": 0.1965, "step": 3780 }, { "epoch": 0.71, "learning_rate": 1.6578947368421053e-06, "loss": 0.1866, "step": 3790 }, { "epoch": 0.71, "learning_rate": 1.5789473684210526e-06, "loss": 0.1848, "step": 3800 }, { "epoch": 0.71, "learning_rate": 1.5e-06, "loss": 0.204, "step": 3810 }, { "epoch": 0.71, "learning_rate": 1.4210526315789475e-06, "loss": 0.1807, "step": 3820 }, { "epoch": 0.72, "learning_rate": 1.3421052631578947e-06, "loss": 0.1858, "step": 3830 }, { "epoch": 0.72, "learning_rate": 1.2631578947368422e-06, "loss": 0.1953, "step": 3840 }, { "epoch": 0.72, "learning_rate": 1.1842105263157894e-06, "loss": 0.1793, "step": 3850 }, { "epoch": 0.72, "learning_rate": 1.1052631578947367e-06, "loss": 0.1987, "step": 3860 }, { "epoch": 0.72, "learning_rate": 1.0263157894736843e-06, "loss": 0.1857, "step": 3870 }, { "epoch": 0.73, "learning_rate": 9.473684210526316e-07, "loss": 0.1767, "step": 3880 }, { "epoch": 0.73, "learning_rate": 8.684210526315789e-07, "loss": 0.1784, "step": 3890 }, { "epoch": 0.73, "learning_rate": 7.894736842105263e-07, "loss": 0.1657, "step": 3900 }, { "epoch": 0.73, "learning_rate": 7.105263157894737e-07, "loss": 0.1801, "step": 3910 }, { "epoch": 0.73, "learning_rate": 6.315789473684211e-07, "loss": 0.1853, "step": 3920 }, { "epoch": 0.74, "learning_rate": 5.526315789473683e-07, "loss": 0.1843, "step": 3930 }, { "epoch": 0.74, "learning_rate": 4.736842105263158e-07, "loss": 0.2039, "step": 3940 }, { "epoch": 0.74, "learning_rate": 3.9473684210526315e-07, "loss": 0.2031, "step": 3950 }, { "epoch": 0.74, "learning_rate": 3.1578947368421055e-07, "loss": 0.1851, "step": 3960 }, { "epoch": 0.74, "learning_rate": 2.368421052631579e-07, "loss": 0.1765, "step": 3970 }, { "epoch": 0.74, "learning_rate": 1.5789473684210527e-07, "loss": 0.1724, "step": 3980 }, { "epoch": 0.75, "learning_rate": 7.894736842105264e-08, "loss": 0.1824, "step": 3990 }, { "epoch": 0.75, "learning_rate": 0.0, "loss": 0.1716, "step": 4000 } ], "max_steps": 4000, "num_train_epochs": 1, "total_flos": 0.0, "trial_name": null, "trial_params": null }