{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14381040036815462, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00019920000000000002, "loss": 1.0145, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.0001984, "loss": 0.9763, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.0001976, "loss": 0.9834, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.0001968, "loss": 1.0162, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.000196, "loss": 0.8783, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.0001952, "loss": 1.0353, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.0001944, "loss": 1.0571, "step": 70 }, { "epoch": 0.0, "learning_rate": 0.00019360000000000002, "loss": 0.9313, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.0001928, "loss": 0.9828, "step": 90 }, { "epoch": 0.01, "learning_rate": 0.000192, "loss": 1.0549, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.0001912, "loss": 1.0297, "step": 110 }, { "epoch": 0.01, "learning_rate": 0.0001904, "loss": 0.8975, "step": 120 }, { "epoch": 0.01, "learning_rate": 0.0001896, "loss": 0.9403, "step": 130 }, { "epoch": 0.01, "learning_rate": 0.0001888, "loss": 1.0143, "step": 140 }, { "epoch": 0.01, "learning_rate": 0.000188, "loss": 0.9905, "step": 150 }, { "epoch": 0.01, "learning_rate": 0.00018720000000000002, "loss": 0.931, "step": 160 }, { "epoch": 0.01, "learning_rate": 0.00018640000000000003, "loss": 1.0687, "step": 170 }, { "epoch": 0.01, "learning_rate": 0.0001856, "loss": 1.1008, "step": 180 }, { "epoch": 0.01, "learning_rate": 0.00018480000000000002, "loss": 1.0267, "step": 190 }, { "epoch": 0.01, "learning_rate": 0.00018400000000000003, "loss": 0.9282, "step": 200 }, { "epoch": 0.01, "learning_rate": 0.0001832, "loss": 0.9845, "step": 210 }, { "epoch": 0.01, "learning_rate": 0.00018240000000000002, "loss": 1.0082, "step": 220 }, { "epoch": 0.01, "learning_rate": 0.00018160000000000002, "loss": 0.9368, "step": 230 }, { "epoch": 0.01, "learning_rate": 0.0001808, "loss": 1.0412, "step": 240 }, { "epoch": 0.01, "learning_rate": 0.00018, "loss": 0.9716, "step": 250 }, { "epoch": 0.01, "learning_rate": 0.00017920000000000002, "loss": 0.978, "step": 260 }, { "epoch": 0.02, "learning_rate": 0.0001784, "loss": 1.0301, "step": 270 }, { "epoch": 0.02, "learning_rate": 0.0001776, "loss": 1.0605, "step": 280 }, { "epoch": 0.02, "learning_rate": 0.00017680000000000001, "loss": 0.9383, "step": 290 }, { "epoch": 0.02, "learning_rate": 0.00017600000000000002, "loss": 1.0683, "step": 300 }, { "epoch": 0.02, "learning_rate": 0.0001752, "loss": 1.0155, "step": 310 }, { "epoch": 0.02, "learning_rate": 0.0001744, "loss": 0.9944, "step": 320 }, { "epoch": 0.02, "learning_rate": 0.00017360000000000002, "loss": 0.9694, "step": 330 }, { "epoch": 0.02, "learning_rate": 0.0001728, "loss": 1.0904, "step": 340 }, { "epoch": 0.02, "learning_rate": 0.000172, "loss": 1.0099, "step": 350 }, { "epoch": 0.02, "learning_rate": 0.00017120000000000001, "loss": 0.9639, "step": 360 }, { "epoch": 0.02, "learning_rate": 0.0001704, "loss": 0.9827, "step": 370 }, { "epoch": 0.02, "learning_rate": 0.0001696, "loss": 0.9929, "step": 380 }, { "epoch": 0.02, "learning_rate": 0.0001688, "loss": 1.0025, "step": 390 }, { "epoch": 0.02, "learning_rate": 0.000168, "loss": 1.0533, "step": 400 }, { "epoch": 0.02, "learning_rate": 0.0001672, "loss": 0.9881, "step": 410 }, { "epoch": 0.02, "learning_rate": 0.0001664, "loss": 1.0109, "step": 420 }, { "epoch": 0.02, "learning_rate": 0.0001656, "loss": 0.9747, "step": 430 }, { "epoch": 0.03, "learning_rate": 0.0001648, "loss": 1.0304, "step": 440 }, { "epoch": 0.03, "learning_rate": 0.000164, "loss": 1.0282, "step": 450 }, { "epoch": 0.03, "learning_rate": 0.0001632, "loss": 0.9555, "step": 460 }, { "epoch": 0.03, "learning_rate": 0.00016240000000000002, "loss": 1.0386, "step": 470 }, { "epoch": 0.03, "learning_rate": 0.00016160000000000002, "loss": 0.931, "step": 480 }, { "epoch": 0.03, "learning_rate": 0.0001608, "loss": 0.9508, "step": 490 }, { "epoch": 0.03, "learning_rate": 0.00016, "loss": 0.9486, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.00015920000000000002, "loss": 0.9454, "step": 510 }, { "epoch": 0.03, "learning_rate": 0.00015840000000000003, "loss": 1.0293, "step": 520 }, { "epoch": 0.03, "learning_rate": 0.0001576, "loss": 1.0367, "step": 530 }, { "epoch": 0.03, "learning_rate": 0.00015680000000000002, "loss": 1.1525, "step": 540 }, { "epoch": 0.03, "learning_rate": 0.00015600000000000002, "loss": 1.0292, "step": 550 }, { "epoch": 0.03, "learning_rate": 0.0001552, "loss": 1.0465, "step": 560 }, { "epoch": 0.03, "learning_rate": 0.0001544, "loss": 0.9291, "step": 570 }, { "epoch": 0.03, "learning_rate": 0.00015360000000000002, "loss": 1.0446, "step": 580 }, { "epoch": 0.03, "learning_rate": 0.0001528, "loss": 0.9587, "step": 590 }, { "epoch": 0.03, "learning_rate": 0.000152, "loss": 0.9981, "step": 600 }, { "epoch": 0.04, "learning_rate": 0.00015120000000000002, "loss": 1.1322, "step": 610 }, { "epoch": 0.04, "learning_rate": 0.0001504, "loss": 0.9966, "step": 620 }, { "epoch": 0.04, "learning_rate": 0.0001496, "loss": 1.076, "step": 630 }, { "epoch": 0.04, "learning_rate": 0.0001488, "loss": 0.9545, "step": 640 }, { "epoch": 0.04, "learning_rate": 0.000148, "loss": 0.9752, "step": 650 }, { "epoch": 0.04, "learning_rate": 0.0001472, "loss": 0.9336, "step": 660 }, { "epoch": 0.04, "learning_rate": 0.0001464, "loss": 0.9587, "step": 670 }, { "epoch": 0.04, "learning_rate": 0.00014560000000000002, "loss": 1.0586, "step": 680 }, { "epoch": 0.04, "learning_rate": 0.0001448, "loss": 1.0869, "step": 690 }, { "epoch": 0.04, "learning_rate": 0.000144, "loss": 0.9833, "step": 700 }, { "epoch": 0.04, "learning_rate": 0.0001432, "loss": 1.0594, "step": 710 }, { "epoch": 0.04, "learning_rate": 0.0001424, "loss": 1.1646, "step": 720 }, { "epoch": 0.04, "learning_rate": 0.0001416, "loss": 0.954, "step": 730 }, { "epoch": 0.04, "learning_rate": 0.0001408, "loss": 0.9845, "step": 740 }, { "epoch": 0.04, "learning_rate": 0.00014, "loss": 1.0662, "step": 750 }, { "epoch": 0.04, "learning_rate": 0.0001392, "loss": 1.0889, "step": 760 }, { "epoch": 0.04, "learning_rate": 0.0001384, "loss": 1.0045, "step": 770 }, { "epoch": 0.04, "learning_rate": 0.00013759999999999998, "loss": 0.9918, "step": 780 }, { "epoch": 0.05, "learning_rate": 0.00013680000000000002, "loss": 0.9599, "step": 790 }, { "epoch": 0.05, "learning_rate": 0.00013600000000000003, "loss": 0.9759, "step": 800 }, { "epoch": 0.05, "learning_rate": 0.0001352, "loss": 0.9278, "step": 810 }, { "epoch": 0.05, "learning_rate": 0.00013440000000000001, "loss": 1.0274, "step": 820 }, { "epoch": 0.05, "learning_rate": 0.00013360000000000002, "loss": 1.0576, "step": 830 }, { "epoch": 0.05, "learning_rate": 0.0001328, "loss": 0.9719, "step": 840 }, { "epoch": 0.05, "learning_rate": 0.000132, "loss": 0.9621, "step": 850 }, { "epoch": 0.05, "learning_rate": 0.00013120000000000002, "loss": 0.9488, "step": 860 }, { "epoch": 0.05, "learning_rate": 0.0001304, "loss": 1.0323, "step": 870 }, { "epoch": 0.05, "learning_rate": 0.0001296, "loss": 1.0105, "step": 880 }, { "epoch": 0.05, "learning_rate": 0.00012880000000000001, "loss": 0.9062, "step": 890 }, { "epoch": 0.05, "learning_rate": 0.00012800000000000002, "loss": 1.009, "step": 900 }, { "epoch": 0.05, "learning_rate": 0.0001272, "loss": 0.9946, "step": 910 }, { "epoch": 0.05, "learning_rate": 0.0001264, "loss": 1.0547, "step": 920 }, { "epoch": 0.05, "learning_rate": 0.00012560000000000002, "loss": 0.9743, "step": 930 }, { "epoch": 0.05, "learning_rate": 0.0001248, "loss": 1.0, "step": 940 }, { "epoch": 0.05, "learning_rate": 0.000124, "loss": 0.9044, "step": 950 }, { "epoch": 0.06, "learning_rate": 0.0001232, "loss": 0.962, "step": 960 }, { "epoch": 0.06, "learning_rate": 0.0001224, "loss": 1.0243, "step": 970 }, { "epoch": 0.06, "learning_rate": 0.0001216, "loss": 1.0634, "step": 980 }, { "epoch": 0.06, "learning_rate": 0.0001208, "loss": 1.0166, "step": 990 }, { "epoch": 0.06, "learning_rate": 0.00012, "loss": 0.9862, "step": 1000 }, { "epoch": 0.06, "learning_rate": 0.0001192, "loss": 1.0362, "step": 1010 }, { "epoch": 0.06, "learning_rate": 0.0001184, "loss": 0.966, "step": 1020 }, { "epoch": 0.06, "learning_rate": 0.0001176, "loss": 1.0006, "step": 1030 }, { "epoch": 0.06, "learning_rate": 0.00011679999999999999, "loss": 1.0357, "step": 1040 }, { "epoch": 0.06, "learning_rate": 0.000116, "loss": 0.913, "step": 1050 }, { "epoch": 0.06, "learning_rate": 0.0001152, "loss": 0.9778, "step": 1060 }, { "epoch": 0.06, "learning_rate": 0.0001144, "loss": 0.9639, "step": 1070 }, { "epoch": 0.06, "learning_rate": 0.0001136, "loss": 0.9868, "step": 1080 }, { "epoch": 0.06, "learning_rate": 0.00011279999999999999, "loss": 0.9675, "step": 1090 }, { "epoch": 0.06, "learning_rate": 0.00011200000000000001, "loss": 0.9512, "step": 1100 }, { "epoch": 0.06, "learning_rate": 0.00011120000000000002, "loss": 0.9702, "step": 1110 }, { "epoch": 0.06, "learning_rate": 0.00011040000000000001, "loss": 1.0048, "step": 1120 }, { "epoch": 0.07, "learning_rate": 0.00010960000000000001, "loss": 1.0567, "step": 1130 }, { "epoch": 0.07, "learning_rate": 0.00010880000000000002, "loss": 1.0748, "step": 1140 }, { "epoch": 0.07, "learning_rate": 0.00010800000000000001, "loss": 1.0211, "step": 1150 }, { "epoch": 0.07, "learning_rate": 0.00010720000000000002, "loss": 1.0302, "step": 1160 }, { "epoch": 0.07, "learning_rate": 0.00010640000000000001, "loss": 0.95, "step": 1170 }, { "epoch": 0.07, "learning_rate": 0.0001056, "loss": 0.9633, "step": 1180 }, { "epoch": 0.07, "learning_rate": 0.00010480000000000001, "loss": 0.953, "step": 1190 }, { "epoch": 0.07, "learning_rate": 0.00010400000000000001, "loss": 1.1539, "step": 1200 }, { "epoch": 0.07, "learning_rate": 0.0001032, "loss": 1.0204, "step": 1210 }, { "epoch": 0.07, "learning_rate": 0.00010240000000000001, "loss": 1.0057, "step": 1220 }, { "epoch": 0.07, "learning_rate": 0.0001016, "loss": 0.979, "step": 1230 }, { "epoch": 0.07, "learning_rate": 0.00010080000000000001, "loss": 0.9177, "step": 1240 }, { "epoch": 0.07, "learning_rate": 0.0001, "loss": 0.9689, "step": 1250 }, { "epoch": 0.07, "learning_rate": 9.92e-05, "loss": 1.0309, "step": 1260 }, { "epoch": 0.07, "learning_rate": 9.84e-05, "loss": 0.9808, "step": 1270 }, { "epoch": 0.07, "learning_rate": 9.76e-05, "loss": 0.9918, "step": 1280 }, { "epoch": 0.07, "learning_rate": 9.680000000000001e-05, "loss": 0.9972, "step": 1290 }, { "epoch": 0.07, "learning_rate": 9.6e-05, "loss": 1.0096, "step": 1300 }, { "epoch": 0.08, "learning_rate": 9.52e-05, "loss": 0.9938, "step": 1310 }, { "epoch": 0.08, "learning_rate": 9.44e-05, "loss": 1.0777, "step": 1320 }, { "epoch": 0.08, "learning_rate": 9.360000000000001e-05, "loss": 0.9023, "step": 1330 }, { "epoch": 0.08, "learning_rate": 9.28e-05, "loss": 0.9703, "step": 1340 }, { "epoch": 0.08, "learning_rate": 9.200000000000001e-05, "loss": 0.9471, "step": 1350 }, { "epoch": 0.08, "learning_rate": 9.120000000000001e-05, "loss": 0.9502, "step": 1360 }, { "epoch": 0.08, "learning_rate": 9.04e-05, "loss": 0.9785, "step": 1370 }, { "epoch": 0.08, "learning_rate": 8.960000000000001e-05, "loss": 0.9209, "step": 1380 }, { "epoch": 0.08, "learning_rate": 8.88e-05, "loss": 0.9626, "step": 1390 }, { "epoch": 0.08, "learning_rate": 8.800000000000001e-05, "loss": 0.9381, "step": 1400 }, { "epoch": 0.08, "learning_rate": 8.72e-05, "loss": 0.9848, "step": 1410 }, { "epoch": 0.08, "learning_rate": 8.64e-05, "loss": 0.988, "step": 1420 }, { "epoch": 0.08, "learning_rate": 8.560000000000001e-05, "loss": 0.9534, "step": 1430 }, { "epoch": 0.08, "learning_rate": 8.48e-05, "loss": 0.9494, "step": 1440 }, { "epoch": 0.08, "learning_rate": 8.4e-05, "loss": 0.9913, "step": 1450 }, { "epoch": 0.08, "learning_rate": 8.32e-05, "loss": 0.9673, "step": 1460 }, { "epoch": 0.08, "learning_rate": 8.24e-05, "loss": 1.0535, "step": 1470 }, { "epoch": 0.09, "learning_rate": 8.16e-05, "loss": 1.0376, "step": 1480 }, { "epoch": 0.09, "learning_rate": 8.080000000000001e-05, "loss": 1.0549, "step": 1490 }, { "epoch": 0.09, "learning_rate": 8e-05, "loss": 1.1231, "step": 1500 }, { "epoch": 0.09, "learning_rate": 7.920000000000001e-05, "loss": 0.9429, "step": 1510 }, { "epoch": 0.09, "learning_rate": 7.840000000000001e-05, "loss": 0.9748, "step": 1520 }, { "epoch": 0.09, "learning_rate": 7.76e-05, "loss": 1.1094, "step": 1530 }, { "epoch": 0.09, "learning_rate": 7.680000000000001e-05, "loss": 1.0432, "step": 1540 }, { "epoch": 0.09, "learning_rate": 7.6e-05, "loss": 0.9773, "step": 1550 }, { "epoch": 0.09, "learning_rate": 7.52e-05, "loss": 1.0378, "step": 1560 }, { "epoch": 0.09, "learning_rate": 7.44e-05, "loss": 1.0792, "step": 1570 }, { "epoch": 0.09, "learning_rate": 7.36e-05, "loss": 0.9189, "step": 1580 }, { "epoch": 0.09, "learning_rate": 7.280000000000001e-05, "loss": 1.0653, "step": 1590 }, { "epoch": 0.09, "learning_rate": 7.2e-05, "loss": 1.021, "step": 1600 }, { "epoch": 0.09, "learning_rate": 7.12e-05, "loss": 1.1548, "step": 1610 }, { "epoch": 0.09, "learning_rate": 7.04e-05, "loss": 1.0094, "step": 1620 }, { "epoch": 0.09, "learning_rate": 6.96e-05, "loss": 0.9691, "step": 1630 }, { "epoch": 0.09, "learning_rate": 6.879999999999999e-05, "loss": 0.9898, "step": 1640 }, { "epoch": 0.09, "learning_rate": 6.800000000000001e-05, "loss": 1.0263, "step": 1650 }, { "epoch": 0.1, "learning_rate": 6.720000000000001e-05, "loss": 0.9948, "step": 1660 }, { "epoch": 0.1, "learning_rate": 6.64e-05, "loss": 0.9809, "step": 1670 }, { "epoch": 0.1, "learning_rate": 6.560000000000001e-05, "loss": 1.0254, "step": 1680 }, { "epoch": 0.1, "learning_rate": 6.48e-05, "loss": 0.9775, "step": 1690 }, { "epoch": 0.1, "learning_rate": 6.400000000000001e-05, "loss": 1.0165, "step": 1700 }, { "epoch": 0.1, "learning_rate": 6.32e-05, "loss": 0.987, "step": 1710 }, { "epoch": 0.1, "learning_rate": 6.24e-05, "loss": 0.9548, "step": 1720 }, { "epoch": 0.1, "learning_rate": 6.16e-05, "loss": 0.982, "step": 1730 }, { "epoch": 0.1, "learning_rate": 6.08e-05, "loss": 1.1186, "step": 1740 }, { "epoch": 0.1, "learning_rate": 6e-05, "loss": 1.0086, "step": 1750 }, { "epoch": 0.1, "learning_rate": 5.92e-05, "loss": 1.0121, "step": 1760 }, { "epoch": 0.1, "learning_rate": 5.8399999999999997e-05, "loss": 1.0157, "step": 1770 }, { "epoch": 0.1, "learning_rate": 5.76e-05, "loss": 0.9987, "step": 1780 }, { "epoch": 0.1, "learning_rate": 5.68e-05, "loss": 1.0014, "step": 1790 }, { "epoch": 0.1, "learning_rate": 5.6000000000000006e-05, "loss": 1.0788, "step": 1800 }, { "epoch": 0.1, "learning_rate": 5.520000000000001e-05, "loss": 1.0208, "step": 1810 }, { "epoch": 0.1, "learning_rate": 5.440000000000001e-05, "loss": 1.0551, "step": 1820 }, { "epoch": 0.11, "learning_rate": 5.360000000000001e-05, "loss": 1.0565, "step": 1830 }, { "epoch": 0.11, "learning_rate": 5.28e-05, "loss": 1.0127, "step": 1840 }, { "epoch": 0.11, "learning_rate": 5.2000000000000004e-05, "loss": 0.9067, "step": 1850 }, { "epoch": 0.11, "learning_rate": 5.1200000000000004e-05, "loss": 0.9731, "step": 1860 }, { "epoch": 0.11, "learning_rate": 5.0400000000000005e-05, "loss": 1.005, "step": 1870 }, { "epoch": 0.11, "learning_rate": 4.96e-05, "loss": 1.0038, "step": 1880 }, { "epoch": 0.11, "learning_rate": 4.88e-05, "loss": 0.9671, "step": 1890 }, { "epoch": 0.11, "learning_rate": 4.8e-05, "loss": 0.9792, "step": 1900 }, { "epoch": 0.11, "learning_rate": 4.72e-05, "loss": 0.9125, "step": 1910 }, { "epoch": 0.11, "learning_rate": 4.64e-05, "loss": 1.0566, "step": 1920 }, { "epoch": 0.11, "learning_rate": 4.5600000000000004e-05, "loss": 0.9108, "step": 1930 }, { "epoch": 0.11, "learning_rate": 4.4800000000000005e-05, "loss": 1.0017, "step": 1940 }, { "epoch": 0.11, "learning_rate": 4.4000000000000006e-05, "loss": 0.96, "step": 1950 }, { "epoch": 0.11, "learning_rate": 4.32e-05, "loss": 0.9096, "step": 1960 }, { "epoch": 0.11, "learning_rate": 4.24e-05, "loss": 0.9037, "step": 1970 }, { "epoch": 0.11, "learning_rate": 4.16e-05, "loss": 0.9881, "step": 1980 }, { "epoch": 0.11, "learning_rate": 4.08e-05, "loss": 0.9556, "step": 1990 }, { "epoch": 0.12, "learning_rate": 4e-05, "loss": 0.9572, "step": 2000 }, { "epoch": 0.12, "learning_rate": 3.9200000000000004e-05, "loss": 1.0412, "step": 2010 }, { "epoch": 0.12, "learning_rate": 3.8400000000000005e-05, "loss": 1.0179, "step": 2020 }, { "epoch": 0.12, "learning_rate": 3.76e-05, "loss": 1.032, "step": 2030 }, { "epoch": 0.12, "learning_rate": 3.68e-05, "loss": 0.9593, "step": 2040 }, { "epoch": 0.12, "learning_rate": 3.6e-05, "loss": 0.9784, "step": 2050 }, { "epoch": 0.12, "learning_rate": 3.52e-05, "loss": 0.9205, "step": 2060 }, { "epoch": 0.12, "learning_rate": 3.4399999999999996e-05, "loss": 0.98, "step": 2070 }, { "epoch": 0.12, "learning_rate": 3.3600000000000004e-05, "loss": 1.122, "step": 2080 }, { "epoch": 0.12, "learning_rate": 3.2800000000000004e-05, "loss": 0.9987, "step": 2090 }, { "epoch": 0.12, "learning_rate": 3.2000000000000005e-05, "loss": 0.9724, "step": 2100 }, { "epoch": 0.12, "learning_rate": 3.12e-05, "loss": 1.0209, "step": 2110 }, { "epoch": 0.12, "learning_rate": 3.04e-05, "loss": 0.9966, "step": 2120 }, { "epoch": 0.12, "learning_rate": 2.96e-05, "loss": 0.9792, "step": 2130 }, { "epoch": 0.12, "learning_rate": 2.88e-05, "loss": 1.0142, "step": 2140 }, { "epoch": 0.12, "learning_rate": 2.8000000000000003e-05, "loss": 0.9164, "step": 2150 }, { "epoch": 0.12, "learning_rate": 2.7200000000000004e-05, "loss": 1.0151, "step": 2160 }, { "epoch": 0.12, "learning_rate": 2.64e-05, "loss": 0.9667, "step": 2170 }, { "epoch": 0.13, "learning_rate": 2.5600000000000002e-05, "loss": 1.1009, "step": 2180 }, { "epoch": 0.13, "learning_rate": 2.48e-05, "loss": 0.9313, "step": 2190 }, { "epoch": 0.13, "learning_rate": 2.4e-05, "loss": 1.0617, "step": 2200 }, { "epoch": 0.13, "learning_rate": 2.32e-05, "loss": 1.0034, "step": 2210 }, { "epoch": 0.13, "learning_rate": 2.2400000000000002e-05, "loss": 0.9779, "step": 2220 }, { "epoch": 0.13, "learning_rate": 2.16e-05, "loss": 0.9861, "step": 2230 }, { "epoch": 0.13, "learning_rate": 2.08e-05, "loss": 0.9929, "step": 2240 }, { "epoch": 0.13, "learning_rate": 2e-05, "loss": 1.0136, "step": 2250 }, { "epoch": 0.13, "learning_rate": 1.9200000000000003e-05, "loss": 0.9733, "step": 2260 }, { "epoch": 0.13, "learning_rate": 1.84e-05, "loss": 0.9304, "step": 2270 }, { "epoch": 0.13, "learning_rate": 1.76e-05, "loss": 0.976, "step": 2280 }, { "epoch": 0.13, "learning_rate": 1.6800000000000002e-05, "loss": 1.0155, "step": 2290 }, { "epoch": 0.13, "learning_rate": 1.6000000000000003e-05, "loss": 0.9637, "step": 2300 }, { "epoch": 0.13, "learning_rate": 1.52e-05, "loss": 0.9841, "step": 2310 }, { "epoch": 0.13, "learning_rate": 1.44e-05, "loss": 0.9995, "step": 2320 }, { "epoch": 0.13, "learning_rate": 1.3600000000000002e-05, "loss": 0.944, "step": 2330 }, { "epoch": 0.13, "learning_rate": 1.2800000000000001e-05, "loss": 1.0196, "step": 2340 }, { "epoch": 0.14, "learning_rate": 1.2e-05, "loss": 1.1164, "step": 2350 }, { "epoch": 0.14, "learning_rate": 1.1200000000000001e-05, "loss": 0.9235, "step": 2360 }, { "epoch": 0.14, "learning_rate": 1.04e-05, "loss": 0.9698, "step": 2370 }, { "epoch": 0.14, "learning_rate": 9.600000000000001e-06, "loss": 1.0232, "step": 2380 }, { "epoch": 0.14, "learning_rate": 8.8e-06, "loss": 0.9899, "step": 2390 }, { "epoch": 0.14, "learning_rate": 8.000000000000001e-06, "loss": 0.9535, "step": 2400 }, { "epoch": 0.14, "learning_rate": 7.2e-06, "loss": 0.977, "step": 2410 }, { "epoch": 0.14, "learning_rate": 6.4000000000000006e-06, "loss": 1.0414, "step": 2420 }, { "epoch": 0.14, "learning_rate": 5.600000000000001e-06, "loss": 0.986, "step": 2430 }, { "epoch": 0.14, "learning_rate": 4.800000000000001e-06, "loss": 1.0086, "step": 2440 }, { "epoch": 0.14, "learning_rate": 4.000000000000001e-06, "loss": 0.9594, "step": 2450 }, { "epoch": 0.14, "learning_rate": 3.2000000000000003e-06, "loss": 0.9584, "step": 2460 }, { "epoch": 0.14, "learning_rate": 2.4000000000000003e-06, "loss": 0.8961, "step": 2470 }, { "epoch": 0.14, "learning_rate": 1.6000000000000001e-06, "loss": 0.9713, "step": 2480 }, { "epoch": 0.14, "learning_rate": 8.000000000000001e-07, "loss": 1.0307, "step": 2490 }, { "epoch": 0.14, "learning_rate": 0.0, "loss": 0.9984, "step": 2500 } ], "logging_steps": 10, "max_steps": 2500, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.63610722566144e+18, "trial_name": null, "trial_params": null }