{ "best_metric": 41.125, "best_model_checkpoint": "result/Chat-Llama-2-13b-chat-hf-mezo-ft-20000-16-1e-3-1e-1-0/checkpoint-4000", "epoch": 63.492063492063494, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 0.001, "loss": 31.3156, "step": 10 }, { "epoch": 0.32, "learning_rate": 0.001, "loss": 31.675, "step": 20 }, { "epoch": 0.48, "learning_rate": 0.001, "loss": 31.5031, "step": 30 }, { "epoch": 0.63, "learning_rate": 0.001, "loss": 31.4563, "step": 40 }, { "epoch": 0.79, "learning_rate": 0.001, "loss": 31.4125, "step": 50 }, { "epoch": 0.95, "learning_rate": 0.001, "loss": 31.7234, "step": 60 }, { "epoch": 1.11, "learning_rate": 0.001, "loss": 31.6281, "step": 70 }, { "epoch": 1.27, "learning_rate": 0.001, "loss": 31.6859, "step": 80 }, { "epoch": 1.43, "learning_rate": 0.001, "loss": 31.6109, "step": 90 }, { "epoch": 1.59, "learning_rate": 0.001, "loss": 31.7641, "step": 100 }, { "epoch": 1.75, "learning_rate": 0.001, "loss": 31.7063, "step": 110 }, { "epoch": 1.9, "learning_rate": 0.001, "loss": 31.85, "step": 120 }, { "epoch": 2.06, "learning_rate": 0.001, "loss": 31.975, "step": 130 }, { "epoch": 2.22, "learning_rate": 0.001, "loss": 31.9344, "step": 140 }, { "epoch": 2.38, "learning_rate": 0.001, "loss": 32.0484, "step": 150 }, { "epoch": 2.54, "learning_rate": 0.001, "loss": 32.0344, "step": 160 }, { "epoch": 2.7, "learning_rate": 0.001, "loss": 32.0703, "step": 170 }, { "epoch": 2.86, "learning_rate": 0.001, "loss": 32.0656, "step": 180 }, { "epoch": 3.02, "learning_rate": 0.001, "loss": 32.0734, "step": 190 }, { "epoch": 3.17, "learning_rate": 0.001, "loss": 32.2547, "step": 200 }, { "epoch": 3.33, "learning_rate": 0.001, "loss": 32.1437, "step": 210 }, { "epoch": 3.49, "learning_rate": 0.001, "loss": 32.3672, "step": 220 }, { "epoch": 3.65, "learning_rate": 0.001, "loss": 32.2766, "step": 230 }, { "epoch": 3.81, "learning_rate": 0.001, "loss": 32.3906, "step": 240 }, { "epoch": 3.97, "learning_rate": 0.001, "loss": 32.6156, "step": 250 }, { "epoch": 4.13, "learning_rate": 0.001, "loss": 32.5547, "step": 260 }, { "epoch": 4.29, "learning_rate": 0.001, "loss": 32.8094, "step": 270 }, { "epoch": 4.44, "learning_rate": 0.001, "loss": 32.7406, "step": 280 }, { "epoch": 4.6, "learning_rate": 0.001, "loss": 32.9031, "step": 290 }, { "epoch": 4.76, "learning_rate": 0.001, "loss": 32.8188, "step": 300 }, { "epoch": 4.92, "learning_rate": 0.001, "loss": 32.9562, "step": 310 }, { "epoch": 5.08, "learning_rate": 0.001, "loss": 32.9469, "step": 320 }, { "epoch": 5.24, "learning_rate": 0.001, "loss": 33.0625, "step": 330 }, { "epoch": 5.4, "learning_rate": 0.001, "loss": 33.0594, "step": 340 }, { "epoch": 5.56, "learning_rate": 0.001, "loss": 33.1031, "step": 350 }, { "epoch": 5.71, "learning_rate": 0.001, "loss": 33.2094, "step": 360 }, { "epoch": 5.87, "learning_rate": 0.001, "loss": 33.2906, "step": 370 }, { "epoch": 6.03, "learning_rate": 0.001, "loss": 33.3406, "step": 380 }, { "epoch": 6.19, "learning_rate": 0.001, "loss": 33.2656, "step": 390 }, { "epoch": 6.35, "learning_rate": 0.001, "loss": 33.3719, "step": 400 }, { "epoch": 6.51, "learning_rate": 0.001, "loss": 33.4844, "step": 410 }, { "epoch": 6.67, "learning_rate": 0.001, "loss": 33.4625, "step": 420 }, { "epoch": 6.83, "learning_rate": 0.001, "loss": 33.4438, "step": 430 }, { "epoch": 6.98, "learning_rate": 0.001, "loss": 33.5844, "step": 440 }, { "epoch": 7.14, "learning_rate": 0.001, "loss": 33.6281, "step": 450 }, { "epoch": 7.3, "learning_rate": 0.001, "loss": 33.7062, "step": 460 }, { "epoch": 7.46, "learning_rate": 0.001, "loss": 33.6719, "step": 470 }, { "epoch": 7.62, "learning_rate": 0.001, "loss": 33.7156, "step": 480 }, { "epoch": 7.78, "learning_rate": 0.001, "loss": 33.75, "step": 490 }, { "epoch": 7.94, "learning_rate": 0.001, "loss": 33.8344, "step": 500 }, { "epoch": 8.1, "learning_rate": 0.001, "loss": 33.725, "step": 510 }, { "epoch": 8.25, "learning_rate": 0.001, "loss": 33.9688, "step": 520 }, { "epoch": 8.41, "learning_rate": 0.001, "loss": 33.9344, "step": 530 }, { "epoch": 8.57, "learning_rate": 0.001, "loss": 34.1, "step": 540 }, { "epoch": 8.73, "learning_rate": 0.001, "loss": 34.0719, "step": 550 }, { "epoch": 8.89, "learning_rate": 0.001, "loss": 34.2563, "step": 560 }, { "epoch": 9.05, "learning_rate": 0.001, "loss": 34.0438, "step": 570 }, { "epoch": 9.21, "learning_rate": 0.001, "loss": 34.3094, "step": 580 }, { "epoch": 9.37, "learning_rate": 0.001, "loss": 34.3687, "step": 590 }, { "epoch": 9.52, "learning_rate": 0.001, "loss": 34.4406, "step": 600 }, { "epoch": 9.68, "learning_rate": 0.001, "loss": 34.3531, "step": 610 }, { "epoch": 9.84, "learning_rate": 0.001, "loss": 34.3344, "step": 620 }, { "epoch": 10.0, "learning_rate": 0.001, "loss": 34.4125, "step": 630 }, { "epoch": 10.16, "learning_rate": 0.001, "loss": 34.4, "step": 640 }, { "epoch": 10.32, "learning_rate": 0.001, "loss": 34.4906, "step": 650 }, { "epoch": 10.48, "learning_rate": 0.001, "loss": 34.5125, "step": 660 }, { "epoch": 10.63, "learning_rate": 0.001, "loss": 34.5875, "step": 670 }, { "epoch": 10.79, "learning_rate": 0.001, "loss": 34.7281, "step": 680 }, { "epoch": 10.95, "learning_rate": 0.001, "loss": 34.825, "step": 690 }, { "epoch": 11.11, "learning_rate": 0.001, "loss": 34.7437, "step": 700 }, { "epoch": 11.27, "learning_rate": 0.001, "loss": 34.6531, "step": 710 }, { "epoch": 11.43, "learning_rate": 0.001, "loss": 34.8531, "step": 720 }, { "epoch": 11.59, "learning_rate": 0.001, "loss": 34.8438, "step": 730 }, { "epoch": 11.75, "learning_rate": 0.001, "loss": 35.0, "step": 740 }, { "epoch": 11.9, "learning_rate": 0.001, "loss": 34.9562, "step": 750 }, { "epoch": 12.06, "learning_rate": 0.001, "loss": 34.9781, "step": 760 }, { "epoch": 12.22, "learning_rate": 0.001, "loss": 35.0625, "step": 770 }, { "epoch": 12.38, "learning_rate": 0.001, "loss": 35.1063, "step": 780 }, { "epoch": 12.54, "learning_rate": 0.001, "loss": 35.1594, "step": 790 }, { "epoch": 12.7, "learning_rate": 0.001, "loss": 35.2, "step": 800 }, { "epoch": 12.86, "learning_rate": 0.001, "loss": 35.1281, "step": 810 }, { "epoch": 13.02, "learning_rate": 0.001, "loss": 35.3563, "step": 820 }, { "epoch": 13.17, "learning_rate": 0.001, "loss": 35.1688, "step": 830 }, { "epoch": 13.33, "learning_rate": 0.001, "loss": 35.3625, "step": 840 }, { "epoch": 13.49, "learning_rate": 0.001, "loss": 35.5438, "step": 850 }, { "epoch": 13.65, "learning_rate": 0.001, "loss": 35.3937, "step": 860 }, { "epoch": 13.81, "learning_rate": 0.001, "loss": 35.3937, "step": 870 }, { "epoch": 13.97, "learning_rate": 0.001, "loss": 35.5938, "step": 880 }, { "epoch": 14.13, "learning_rate": 0.001, "loss": 35.3781, "step": 890 }, { "epoch": 14.29, "learning_rate": 0.001, "loss": 35.775, "step": 900 }, { "epoch": 14.44, "learning_rate": 0.001, "loss": 35.7594, "step": 910 }, { "epoch": 14.6, "learning_rate": 0.001, "loss": 35.5438, "step": 920 }, { "epoch": 14.76, "learning_rate": 0.001, "loss": 35.8406, "step": 930 }, { "epoch": 14.92, "learning_rate": 0.001, "loss": 35.6, "step": 940 }, { "epoch": 15.08, "learning_rate": 0.001, "loss": 35.8844, "step": 950 }, { "epoch": 15.24, "learning_rate": 0.001, "loss": 35.9594, "step": 960 }, { "epoch": 15.4, "learning_rate": 0.001, "loss": 35.8281, "step": 970 }, { "epoch": 15.56, "learning_rate": 0.001, "loss": 36.0125, "step": 980 }, { "epoch": 15.71, "learning_rate": 0.001, "loss": 36.1313, "step": 990 }, { "epoch": 15.87, "learning_rate": 0.001, "loss": 36.0656, "step": 1000 }, { "epoch": 16.03, "learning_rate": 0.001, "loss": 36.1938, "step": 1010 }, { "epoch": 16.19, "learning_rate": 0.001, "loss": 36.2031, "step": 1020 }, { "epoch": 16.35, "learning_rate": 0.001, "loss": 36.1688, "step": 1030 }, { "epoch": 16.51, "learning_rate": 0.001, "loss": 36.3156, "step": 1040 }, { "epoch": 16.67, "learning_rate": 0.001, "loss": 36.2719, "step": 1050 }, { "epoch": 16.83, "learning_rate": 0.001, "loss": 36.4656, "step": 1060 }, { "epoch": 16.98, "learning_rate": 0.001, "loss": 36.3687, "step": 1070 }, { "epoch": 17.14, "learning_rate": 0.001, "loss": 36.4031, "step": 1080 }, { "epoch": 17.3, "learning_rate": 0.001, "loss": 36.5469, "step": 1090 }, { "epoch": 17.46, "learning_rate": 0.001, "loss": 36.5094, "step": 1100 }, { "epoch": 17.62, "learning_rate": 0.001, "loss": 36.6156, "step": 1110 }, { "epoch": 17.78, "learning_rate": 0.001, "loss": 36.5531, "step": 1120 }, { "epoch": 17.94, "learning_rate": 0.001, "loss": 36.525, "step": 1130 }, { "epoch": 18.1, "learning_rate": 0.001, "loss": 36.6562, "step": 1140 }, { "epoch": 18.25, "learning_rate": 0.001, "loss": 36.5812, "step": 1150 }, { "epoch": 18.41, "learning_rate": 0.001, "loss": 36.6625, "step": 1160 }, { "epoch": 18.57, "learning_rate": 0.001, "loss": 36.625, "step": 1170 }, { "epoch": 18.73, "learning_rate": 0.001, "loss": 36.8719, "step": 1180 }, { "epoch": 18.89, "learning_rate": 0.001, "loss": 36.9219, "step": 1190 }, { "epoch": 19.05, "learning_rate": 0.001, "loss": 36.8875, "step": 1200 }, { "epoch": 19.21, "learning_rate": 0.001, "loss": 36.7344, "step": 1210 }, { "epoch": 19.37, "learning_rate": 0.001, "loss": 36.9562, "step": 1220 }, { "epoch": 19.52, "learning_rate": 0.001, "loss": 37.1688, "step": 1230 }, { "epoch": 19.68, "learning_rate": 0.001, "loss": 37.2156, "step": 1240 }, { "epoch": 19.84, "learning_rate": 0.001, "loss": 37.1719, "step": 1250 }, { "epoch": 20.0, "learning_rate": 0.001, "loss": 37.2594, "step": 1260 }, { "epoch": 20.16, "learning_rate": 0.001, "loss": 37.4062, "step": 1270 }, { "epoch": 20.32, "learning_rate": 0.001, "loss": 37.4937, "step": 1280 }, { "epoch": 20.48, "learning_rate": 0.001, "loss": 37.5625, "step": 1290 }, { "epoch": 20.63, "learning_rate": 0.001, "loss": 37.6, "step": 1300 }, { "epoch": 20.79, "learning_rate": 0.001, "loss": 37.6156, "step": 1310 }, { "epoch": 20.95, "learning_rate": 0.001, "loss": 37.625, "step": 1320 }, { "epoch": 21.11, "learning_rate": 0.001, "loss": 37.5906, "step": 1330 }, { "epoch": 21.27, "learning_rate": 0.001, "loss": 37.8344, "step": 1340 }, { "epoch": 21.43, "learning_rate": 0.001, "loss": 37.7656, "step": 1350 }, { "epoch": 21.59, "learning_rate": 0.001, "loss": 37.8875, "step": 1360 }, { "epoch": 21.75, "learning_rate": 0.001, "loss": 37.6437, "step": 1370 }, { "epoch": 21.9, "learning_rate": 0.001, "loss": 38.0063, "step": 1380 }, { "epoch": 22.06, "learning_rate": 0.001, "loss": 38.0594, "step": 1390 }, { "epoch": 22.22, "learning_rate": 0.001, "loss": 37.9656, "step": 1400 }, { "epoch": 22.38, "learning_rate": 0.001, "loss": 38.125, "step": 1410 }, { "epoch": 22.54, "learning_rate": 0.001, "loss": 38.1469, "step": 1420 }, { "epoch": 22.7, "learning_rate": 0.001, "loss": 38.2188, "step": 1430 }, { "epoch": 22.86, "learning_rate": 0.001, "loss": 38.0969, "step": 1440 }, { "epoch": 23.02, "learning_rate": 0.001, "loss": 38.3469, "step": 1450 }, { "epoch": 23.17, "learning_rate": 0.001, "loss": 38.3563, "step": 1460 }, { "epoch": 23.33, "learning_rate": 0.001, "loss": 38.3719, "step": 1470 }, { "epoch": 23.49, "learning_rate": 0.001, "loss": 38.3375, "step": 1480 }, { "epoch": 23.65, "learning_rate": 0.001, "loss": 38.5375, "step": 1490 }, { "epoch": 23.81, "learning_rate": 0.001, "loss": 38.4531, "step": 1500 }, { "epoch": 23.97, "learning_rate": 0.001, "loss": 38.4469, "step": 1510 }, { "epoch": 24.13, "learning_rate": 0.001, "loss": 38.4375, "step": 1520 }, { "epoch": 24.29, "learning_rate": 0.001, "loss": 38.6406, "step": 1530 }, { "epoch": 24.44, "learning_rate": 0.001, "loss": 38.7156, "step": 1540 }, { "epoch": 24.6, "learning_rate": 0.001, "loss": 38.7719, "step": 1550 }, { "epoch": 24.76, "learning_rate": 0.001, "loss": 38.7313, "step": 1560 }, { "epoch": 24.92, "learning_rate": 0.001, "loss": 38.9125, "step": 1570 }, { "epoch": 25.08, "learning_rate": 0.001, "loss": 38.8281, "step": 1580 }, { "epoch": 25.24, "learning_rate": 0.001, "loss": 38.9062, "step": 1590 }, { "epoch": 25.4, "learning_rate": 0.001, "loss": 39.025, "step": 1600 }, { "epoch": 25.56, "learning_rate": 0.001, "loss": 38.9125, "step": 1610 }, { "epoch": 25.71, "learning_rate": 0.001, "loss": 39.0594, "step": 1620 }, { "epoch": 25.87, "learning_rate": 0.001, "loss": 38.9625, "step": 1630 }, { "epoch": 26.03, "learning_rate": 0.001, "loss": 39.1187, "step": 1640 }, { "epoch": 26.19, "learning_rate": 0.001, "loss": 39.1, "step": 1650 }, { "epoch": 26.35, "learning_rate": 0.001, "loss": 39.1313, "step": 1660 }, { "epoch": 26.51, "learning_rate": 0.001, "loss": 39.2906, "step": 1670 }, { "epoch": 26.67, "learning_rate": 0.001, "loss": 39.1719, "step": 1680 }, { "epoch": 26.83, "learning_rate": 0.001, "loss": 39.2687, "step": 1690 }, { "epoch": 26.98, "learning_rate": 0.001, "loss": 39.4312, "step": 1700 }, { "epoch": 27.14, "learning_rate": 0.001, "loss": 39.2563, "step": 1710 }, { "epoch": 27.3, "learning_rate": 0.001, "loss": 39.3344, "step": 1720 }, { "epoch": 27.46, "learning_rate": 0.001, "loss": 39.375, "step": 1730 }, { "epoch": 27.62, "learning_rate": 0.001, "loss": 39.4031, "step": 1740 }, { "epoch": 27.78, "learning_rate": 0.001, "loss": 39.4969, "step": 1750 }, { "epoch": 27.94, "learning_rate": 0.001, "loss": 39.5219, "step": 1760 }, { "epoch": 28.1, "learning_rate": 0.001, "loss": 39.5719, "step": 1770 }, { "epoch": 28.25, "learning_rate": 0.001, "loss": 39.6719, "step": 1780 }, { "epoch": 28.41, "learning_rate": 0.001, "loss": 39.7156, "step": 1790 }, { "epoch": 28.57, "learning_rate": 0.001, "loss": 39.8312, "step": 1800 }, { "epoch": 28.73, "learning_rate": 0.001, "loss": 39.8687, "step": 1810 }, { "epoch": 28.89, "learning_rate": 0.001, "loss": 39.9906, "step": 1820 }, { "epoch": 29.05, "learning_rate": 0.001, "loss": 40.0156, "step": 1830 }, { "epoch": 29.21, "learning_rate": 0.001, "loss": 40.1469, "step": 1840 }, { "epoch": 29.37, "learning_rate": 0.001, "loss": 40.1938, "step": 1850 }, { "epoch": 29.52, "learning_rate": 0.001, "loss": 40.1594, "step": 1860 }, { "epoch": 29.68, "learning_rate": 0.001, "loss": 39.9094, "step": 1870 }, { "epoch": 29.84, "learning_rate": 0.001, "loss": 40.1625, "step": 1880 }, { "epoch": 30.0, "learning_rate": 0.001, "loss": 40.1906, "step": 1890 }, { "epoch": 30.16, "learning_rate": 0.001, "loss": 40.3406, "step": 1900 }, { "epoch": 30.32, "learning_rate": 0.001, "loss": 40.2812, "step": 1910 }, { "epoch": 30.48, "learning_rate": 0.001, "loss": 40.2969, "step": 1920 }, { "epoch": 30.63, "learning_rate": 0.001, "loss": 40.2469, "step": 1930 }, { "epoch": 30.79, "learning_rate": 0.001, "loss": 40.3656, "step": 1940 }, { "epoch": 30.95, "learning_rate": 0.001, "loss": 40.4156, "step": 1950 }, { "epoch": 31.11, "learning_rate": 0.001, "loss": 40.4844, "step": 1960 }, { "epoch": 31.27, "learning_rate": 0.001, "loss": 40.4531, "step": 1970 }, { "epoch": 31.43, "learning_rate": 0.001, "loss": 40.6187, "step": 1980 }, { "epoch": 31.59, "learning_rate": 0.001, "loss": 40.8844, "step": 1990 }, { "epoch": 31.75, "learning_rate": 0.001, "loss": 40.8125, "step": 2000 }, { "epoch": 31.9, "learning_rate": 0.001, "loss": 40.9562, "step": 2010 }, { "epoch": 32.06, "learning_rate": 0.001, "loss": 40.8312, "step": 2020 }, { "epoch": 32.22, "learning_rate": 0.001, "loss": 40.825, "step": 2030 }, { "epoch": 32.38, "learning_rate": 0.001, "loss": 41.0594, "step": 2040 }, { "epoch": 32.54, "learning_rate": 0.001, "loss": 40.8875, "step": 2050 }, { "epoch": 32.7, "learning_rate": 0.001, "loss": 40.8062, "step": 2060 }, { "epoch": 32.86, "learning_rate": 0.001, "loss": 41.0844, "step": 2070 }, { "epoch": 33.02, "learning_rate": 0.001, "loss": 41.1187, "step": 2080 }, { "epoch": 33.17, "learning_rate": 0.001, "loss": 41.1031, "step": 2090 }, { "epoch": 33.33, "learning_rate": 0.001, "loss": 41.2344, "step": 2100 }, { "epoch": 33.49, "learning_rate": 0.001, "loss": 41.175, "step": 2110 }, { "epoch": 33.65, "learning_rate": 0.001, "loss": 41.4813, "step": 2120 }, { "epoch": 33.81, "learning_rate": 0.001, "loss": 41.4031, "step": 2130 }, { "epoch": 33.97, "learning_rate": 0.001, "loss": 41.2875, "step": 2140 }, { "epoch": 34.13, "learning_rate": 0.001, "loss": 41.6031, "step": 2150 }, { "epoch": 34.29, "learning_rate": 0.001, "loss": 41.3781, "step": 2160 }, { "epoch": 34.44, "learning_rate": 0.001, "loss": 41.7094, "step": 2170 }, { "epoch": 34.6, "learning_rate": 0.001, "loss": 41.6, "step": 2180 }, { "epoch": 34.76, "learning_rate": 0.001, "loss": 41.6313, "step": 2190 }, { "epoch": 34.92, "learning_rate": 0.001, "loss": 41.7125, "step": 2200 }, { "epoch": 35.08, "learning_rate": 0.001, "loss": 41.7625, "step": 2210 }, { "epoch": 35.24, "learning_rate": 0.001, "loss": 41.8656, "step": 2220 }, { "epoch": 35.4, "learning_rate": 0.001, "loss": 41.8531, "step": 2230 }, { "epoch": 35.56, "learning_rate": 0.001, "loss": 41.9312, "step": 2240 }, { "epoch": 35.71, "learning_rate": 0.001, "loss": 41.9406, "step": 2250 }, { "epoch": 35.87, "learning_rate": 0.001, "loss": 42.0187, "step": 2260 }, { "epoch": 36.03, "learning_rate": 0.001, "loss": 42.2437, "step": 2270 }, { "epoch": 36.19, "learning_rate": 0.001, "loss": 42.15, "step": 2280 }, { "epoch": 36.35, "learning_rate": 0.001, "loss": 42.2094, "step": 2290 }, { "epoch": 36.51, "learning_rate": 0.001, "loss": 42.2344, "step": 2300 }, { "epoch": 36.67, "learning_rate": 0.001, "loss": 42.4469, "step": 2310 }, { "epoch": 36.83, "learning_rate": 0.001, "loss": 42.3031, "step": 2320 }, { "epoch": 36.98, "learning_rate": 0.001, "loss": 42.3625, "step": 2330 }, { "epoch": 37.14, "learning_rate": 0.001, "loss": 42.7094, "step": 2340 }, { "epoch": 37.3, "learning_rate": 0.001, "loss": 42.6344, "step": 2350 }, { "epoch": 37.46, "learning_rate": 0.001, "loss": 42.6531, "step": 2360 }, { "epoch": 37.62, "learning_rate": 0.001, "loss": 42.6063, "step": 2370 }, { "epoch": 37.78, "learning_rate": 0.001, "loss": 42.6313, "step": 2380 }, { "epoch": 37.94, "learning_rate": 0.001, "loss": 42.7375, "step": 2390 }, { "epoch": 38.1, "learning_rate": 0.001, "loss": 42.7563, "step": 2400 }, { "epoch": 38.25, "learning_rate": 0.001, "loss": 42.7812, "step": 2410 }, { "epoch": 38.41, "learning_rate": 0.001, "loss": 42.7969, "step": 2420 }, { "epoch": 38.57, "learning_rate": 0.001, "loss": 42.8375, "step": 2430 }, { "epoch": 38.73, "learning_rate": 0.001, "loss": 43.0281, "step": 2440 }, { "epoch": 38.89, "learning_rate": 0.001, "loss": 42.9031, "step": 2450 }, { "epoch": 39.05, "learning_rate": 0.001, "loss": 42.9969, "step": 2460 }, { "epoch": 39.21, "learning_rate": 0.001, "loss": 43.075, "step": 2470 }, { "epoch": 39.37, "learning_rate": 0.001, "loss": 43.075, "step": 2480 }, { "epoch": 39.52, "learning_rate": 0.001, "loss": 43.0812, "step": 2490 }, { "epoch": 39.68, "learning_rate": 0.001, "loss": 43.2406, "step": 2500 }, { "epoch": 39.84, "learning_rate": 0.001, "loss": 43.1375, "step": 2510 }, { "epoch": 40.0, "learning_rate": 0.001, "loss": 43.1625, "step": 2520 }, { "epoch": 40.16, "learning_rate": 0.001, "loss": 43.2313, "step": 2530 }, { "epoch": 40.32, "learning_rate": 0.001, "loss": 43.4375, "step": 2540 }, { "epoch": 40.48, "learning_rate": 0.001, "loss": 43.4719, "step": 2550 }, { "epoch": 40.63, "learning_rate": 0.001, "loss": 43.3969, "step": 2560 }, { "epoch": 40.79, "learning_rate": 0.001, "loss": 43.675, "step": 2570 }, { "epoch": 40.95, "learning_rate": 0.001, "loss": 43.5562, "step": 2580 }, { "epoch": 41.11, "learning_rate": 0.001, "loss": 43.6844, "step": 2590 }, { "epoch": 41.27, "learning_rate": 0.001, "loss": 43.8094, "step": 2600 }, { "epoch": 41.43, "learning_rate": 0.001, "loss": 43.8656, "step": 2610 }, { "epoch": 41.59, "learning_rate": 0.001, "loss": 43.9969, "step": 2620 }, { "epoch": 41.75, "learning_rate": 0.001, "loss": 43.8469, "step": 2630 }, { "epoch": 41.9, "learning_rate": 0.001, "loss": 44.0688, "step": 2640 }, { "epoch": 42.06, "learning_rate": 0.001, "loss": 43.9531, "step": 2650 }, { "epoch": 42.22, "learning_rate": 0.001, "loss": 43.9937, "step": 2660 }, { "epoch": 42.38, "learning_rate": 0.001, "loss": 44.1125, "step": 2670 }, { "epoch": 42.54, "learning_rate": 0.001, "loss": 44.1969, "step": 2680 }, { "epoch": 42.7, "learning_rate": 0.001, "loss": 44.1781, "step": 2690 }, { "epoch": 42.86, "learning_rate": 0.001, "loss": 44.3781, "step": 2700 }, { "epoch": 43.02, "learning_rate": 0.001, "loss": 44.3125, "step": 2710 }, { "epoch": 43.17, "learning_rate": 0.001, "loss": 44.2875, "step": 2720 }, { "epoch": 43.33, "learning_rate": 0.001, "loss": 44.3344, "step": 2730 }, { "epoch": 43.49, "learning_rate": 0.001, "loss": 44.3188, "step": 2740 }, { "epoch": 43.65, "learning_rate": 0.001, "loss": 44.3281, "step": 2750 }, { "epoch": 43.81, "learning_rate": 0.001, "loss": 44.5063, "step": 2760 }, { "epoch": 43.97, "learning_rate": 0.001, "loss": 44.5625, "step": 2770 }, { "epoch": 44.13, "learning_rate": 0.001, "loss": 44.2313, "step": 2780 }, { "epoch": 44.29, "learning_rate": 0.001, "loss": 44.7313, "step": 2790 }, { "epoch": 44.44, "learning_rate": 0.001, "loss": 44.6125, "step": 2800 }, { "epoch": 44.6, "learning_rate": 0.001, "loss": 44.7125, "step": 2810 }, { "epoch": 44.76, "learning_rate": 0.001, "loss": 44.9594, "step": 2820 }, { "epoch": 44.92, "learning_rate": 0.001, "loss": 44.975, "step": 2830 }, { "epoch": 45.08, "learning_rate": 0.001, "loss": 44.9531, "step": 2840 }, { "epoch": 45.24, "learning_rate": 0.001, "loss": 44.9406, "step": 2850 }, { "epoch": 45.4, "learning_rate": 0.001, "loss": 45.0812, "step": 2860 }, { "epoch": 45.56, "learning_rate": 0.001, "loss": 45.1969, "step": 2870 }, { "epoch": 45.71, "learning_rate": 0.001, "loss": 44.9125, "step": 2880 }, { "epoch": 45.87, "learning_rate": 0.001, "loss": 45.0312, "step": 2890 }, { "epoch": 46.03, "learning_rate": 0.001, "loss": 45.1406, "step": 2900 }, { "epoch": 46.19, "learning_rate": 0.001, "loss": 45.3125, "step": 2910 }, { "epoch": 46.35, "learning_rate": 0.001, "loss": 45.4719, "step": 2920 }, { "epoch": 46.51, "learning_rate": 0.001, "loss": 45.3312, "step": 2930 }, { "epoch": 46.67, "learning_rate": 0.001, "loss": 45.5156, "step": 2940 }, { "epoch": 46.83, "learning_rate": 0.001, "loss": 45.5406, "step": 2950 }, { "epoch": 46.98, "learning_rate": 0.001, "loss": 45.6063, "step": 2960 }, { "epoch": 47.14, "learning_rate": 0.001, "loss": 45.5781, "step": 2970 }, { "epoch": 47.3, "learning_rate": 0.001, "loss": 45.8438, "step": 2980 }, { "epoch": 47.46, "learning_rate": 0.001, "loss": 45.8813, "step": 2990 }, { "epoch": 47.62, "learning_rate": 0.001, "loss": 46.0375, "step": 3000 }, { "epoch": 47.78, "learning_rate": 0.001, "loss": 45.7313, "step": 3010 }, { "epoch": 47.94, "learning_rate": 0.001, "loss": 45.7844, "step": 3020 }, { "epoch": 48.1, "learning_rate": 0.001, "loss": 46.0875, "step": 3030 }, { "epoch": 48.25, "learning_rate": 0.001, "loss": 45.8438, "step": 3040 }, { "epoch": 48.41, "learning_rate": 0.001, "loss": 45.9375, "step": 3050 }, { "epoch": 48.57, "learning_rate": 0.001, "loss": 46.0656, "step": 3060 }, { "epoch": 48.73, "learning_rate": 0.001, "loss": 46.0469, "step": 3070 }, { "epoch": 48.89, "learning_rate": 0.001, "loss": 46.325, "step": 3080 }, { "epoch": 49.05, "learning_rate": 0.001, "loss": 46.2125, "step": 3090 }, { "epoch": 49.21, "learning_rate": 0.001, "loss": 46.3438, "step": 3100 }, { "epoch": 49.37, "learning_rate": 0.001, "loss": 46.4562, "step": 3110 }, { "epoch": 49.52, "learning_rate": 0.001, "loss": 46.3875, "step": 3120 }, { "epoch": 49.68, "learning_rate": 0.001, "loss": 46.1625, "step": 3130 }, { "epoch": 49.84, "learning_rate": 0.001, "loss": 46.7, "step": 3140 }, { "epoch": 50.0, "learning_rate": 0.001, "loss": 46.4406, "step": 3150 }, { "epoch": 50.16, "learning_rate": 0.001, "loss": 46.5906, "step": 3160 }, { "epoch": 50.32, "learning_rate": 0.001, "loss": 46.6156, "step": 3170 }, { "epoch": 50.48, "learning_rate": 0.001, "loss": 46.6187, "step": 3180 }, { "epoch": 50.63, "learning_rate": 0.001, "loss": 46.8219, "step": 3190 }, { "epoch": 50.79, "learning_rate": 0.001, "loss": 46.7281, "step": 3200 }, { "epoch": 50.95, "learning_rate": 0.001, "loss": 46.6469, "step": 3210 }, { "epoch": 51.11, "learning_rate": 0.001, "loss": 46.7031, "step": 3220 }, { "epoch": 51.27, "learning_rate": 0.001, "loss": 46.8312, "step": 3230 }, { "epoch": 51.43, "learning_rate": 0.001, "loss": 47.0844, "step": 3240 }, { "epoch": 51.59, "learning_rate": 0.001, "loss": 46.9156, "step": 3250 }, { "epoch": 51.75, "learning_rate": 0.001, "loss": 46.9813, "step": 3260 }, { "epoch": 51.9, "learning_rate": 0.001, "loss": 47.3062, "step": 3270 }, { "epoch": 52.06, "learning_rate": 0.001, "loss": 47.2313, "step": 3280 }, { "epoch": 52.22, "learning_rate": 0.001, "loss": 47.2656, "step": 3290 }, { "epoch": 52.38, "learning_rate": 0.001, "loss": 47.2031, "step": 3300 }, { "epoch": 52.54, "learning_rate": 0.001, "loss": 47.2812, "step": 3310 }, { "epoch": 52.7, "learning_rate": 0.001, "loss": 47.2281, "step": 3320 }, { "epoch": 52.86, "learning_rate": 0.001, "loss": 47.2906, "step": 3330 }, { "epoch": 53.02, "learning_rate": 0.001, "loss": 47.4469, "step": 3340 }, { "epoch": 53.17, "learning_rate": 0.001, "loss": 47.3719, "step": 3350 }, { "epoch": 53.33, "learning_rate": 0.001, "loss": 47.4813, "step": 3360 }, { "epoch": 53.49, "learning_rate": 0.001, "loss": 47.5906, "step": 3370 }, { "epoch": 53.65, "learning_rate": 0.001, "loss": 47.5719, "step": 3380 }, { "epoch": 53.81, "learning_rate": 0.001, "loss": 47.5938, "step": 3390 }, { "epoch": 53.97, "learning_rate": 0.001, "loss": 47.5906, "step": 3400 }, { "epoch": 54.13, "learning_rate": 0.001, "loss": 47.8594, "step": 3410 }, { "epoch": 54.29, "learning_rate": 0.001, "loss": 47.7969, "step": 3420 }, { "epoch": 54.44, "learning_rate": 0.001, "loss": 47.7844, "step": 3430 }, { "epoch": 54.6, "learning_rate": 0.001, "loss": 47.7656, "step": 3440 }, { "epoch": 54.76, "learning_rate": 0.001, "loss": 48.0594, "step": 3450 }, { "epoch": 54.92, "learning_rate": 0.001, "loss": 47.8937, "step": 3460 }, { "epoch": 55.08, "learning_rate": 0.001, "loss": 48.0063, "step": 3470 }, { "epoch": 55.24, "learning_rate": 0.001, "loss": 47.8875, "step": 3480 }, { "epoch": 55.4, "learning_rate": 0.001, "loss": 47.9594, "step": 3490 }, { "epoch": 55.56, "learning_rate": 0.001, "loss": 48.0375, "step": 3500 }, { "epoch": 55.71, "learning_rate": 0.001, "loss": 47.9625, "step": 3510 }, { "epoch": 55.87, "learning_rate": 0.001, "loss": 48.2219, "step": 3520 }, { "epoch": 56.03, "learning_rate": 0.001, "loss": 48.2938, "step": 3530 }, { "epoch": 56.19, "learning_rate": 0.001, "loss": 48.3875, "step": 3540 }, { "epoch": 56.35, "learning_rate": 0.001, "loss": 48.3156, "step": 3550 }, { "epoch": 56.51, "learning_rate": 0.001, "loss": 48.4281, "step": 3560 }, { "epoch": 56.67, "learning_rate": 0.001, "loss": 48.3813, "step": 3570 }, { "epoch": 56.83, "learning_rate": 0.001, "loss": 48.6594, "step": 3580 }, { "epoch": 56.98, "learning_rate": 0.001, "loss": 48.5344, "step": 3590 }, { "epoch": 57.14, "learning_rate": 0.001, "loss": 48.4781, "step": 3600 }, { "epoch": 57.3, "learning_rate": 0.001, "loss": 48.6469, "step": 3610 }, { "epoch": 57.46, "learning_rate": 0.001, "loss": 48.7406, "step": 3620 }, { "epoch": 57.62, "learning_rate": 0.001, "loss": 48.8312, "step": 3630 }, { "epoch": 57.78, "learning_rate": 0.001, "loss": 48.7844, "step": 3640 }, { "epoch": 57.94, "learning_rate": 0.001, "loss": 48.9906, "step": 3650 }, { "epoch": 58.1, "learning_rate": 0.001, "loss": 48.7906, "step": 3660 }, { "epoch": 58.25, "learning_rate": 0.001, "loss": 49.2281, "step": 3670 }, { "epoch": 58.41, "learning_rate": 0.001, "loss": 49.2406, "step": 3680 }, { "epoch": 58.57, "learning_rate": 0.001, "loss": 48.9844, "step": 3690 }, { "epoch": 58.73, "learning_rate": 0.001, "loss": 49.0469, "step": 3700 }, { "epoch": 58.89, "learning_rate": 0.001, "loss": 49.3438, "step": 3710 }, { "epoch": 59.05, "learning_rate": 0.001, "loss": 49.2219, "step": 3720 }, { "epoch": 59.21, "learning_rate": 0.001, "loss": 49.3219, "step": 3730 }, { "epoch": 59.37, "learning_rate": 0.001, "loss": 49.4906, "step": 3740 }, { "epoch": 59.52, "learning_rate": 0.001, "loss": 49.2281, "step": 3750 }, { "epoch": 59.68, "learning_rate": 0.001, "loss": 49.4375, "step": 3760 }, { "epoch": 59.84, "learning_rate": 0.001, "loss": 49.4844, "step": 3770 }, { "epoch": 60.0, "learning_rate": 0.001, "loss": 49.4969, "step": 3780 }, { "epoch": 60.16, "learning_rate": 0.001, "loss": 49.475, "step": 3790 }, { "epoch": 60.32, "learning_rate": 0.001, "loss": 49.7406, "step": 3800 }, { "epoch": 60.48, "learning_rate": 0.001, "loss": 49.7406, "step": 3810 }, { "epoch": 60.63, "learning_rate": 0.001, "loss": 49.7687, "step": 3820 }, { "epoch": 60.79, "learning_rate": 0.001, "loss": 49.6281, "step": 3830 }, { "epoch": 60.95, "learning_rate": 0.001, "loss": 49.8312, "step": 3840 }, { "epoch": 61.11, "learning_rate": 0.001, "loss": 50.1094, "step": 3850 }, { "epoch": 61.27, "learning_rate": 0.001, "loss": 49.9156, "step": 3860 }, { "epoch": 61.43, "learning_rate": 0.001, "loss": 49.7563, "step": 3870 }, { "epoch": 61.59, "learning_rate": 0.001, "loss": 49.8719, "step": 3880 }, { "epoch": 61.75, "learning_rate": 0.001, "loss": 50.1219, "step": 3890 }, { "epoch": 61.9, "learning_rate": 0.001, "loss": 50.2469, "step": 3900 }, { "epoch": 62.06, "learning_rate": 0.001, "loss": 50.0844, "step": 3910 }, { "epoch": 62.22, "learning_rate": 0.001, "loss": 50.2719, "step": 3920 }, { "epoch": 62.38, "learning_rate": 0.001, "loss": 50.0219, "step": 3930 }, { "epoch": 62.54, "learning_rate": 0.001, "loss": 50.3875, "step": 3940 }, { "epoch": 62.7, "learning_rate": 0.001, "loss": 50.3594, "step": 3950 }, { "epoch": 62.86, "learning_rate": 0.001, "loss": 50.4531, "step": 3960 }, { "epoch": 63.02, "learning_rate": 0.001, "loss": 50.6, "step": 3970 }, { "epoch": 63.17, "learning_rate": 0.001, "loss": 50.5063, "step": 3980 }, { "epoch": 63.33, "learning_rate": 0.001, "loss": 50.6125, "step": 3990 }, { "epoch": 63.49, "learning_rate": 0.001, "loss": 50.6313, "step": 4000 }, { "epoch": 63.49, "eval_loss": 41.125, "eval_runtime": 97.8561, "eval_samples_per_second": 5.11, "eval_steps_per_second": 0.644, "step": 4000 } ], "max_steps": 20000, "num_train_epochs": 318, "total_flos": 4.897803240892662e+18, "trial_name": null, "trial_params": null }