{ | |
"best_metric": 41.125, | |
"best_model_checkpoint": "result/Chat-Llama-2-13b-chat-hf-mezo-ft-20000-16-1e-3-1e-1-0/checkpoint-4000", | |
"epoch": 63.492063492063494, | |
"global_step": 4000, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.16, | |
"learning_rate": 0.001, | |
"loss": 31.3156, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.32, | |
"learning_rate": 0.001, | |
"loss": 31.675, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.48, | |
"learning_rate": 0.001, | |
"loss": 31.5031, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.63, | |
"learning_rate": 0.001, | |
"loss": 31.4563, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.79, | |
"learning_rate": 0.001, | |
"loss": 31.4125, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.95, | |
"learning_rate": 0.001, | |
"loss": 31.7234, | |
"step": 60 | |
}, | |
{ | |
"epoch": 1.11, | |
"learning_rate": 0.001, | |
"loss": 31.6281, | |
"step": 70 | |
}, | |
{ | |
"epoch": 1.27, | |
"learning_rate": 0.001, | |
"loss": 31.6859, | |
"step": 80 | |
}, | |
{ | |
"epoch": 1.43, | |
"learning_rate": 0.001, | |
"loss": 31.6109, | |
"step": 90 | |
}, | |
{ | |
"epoch": 1.59, | |
"learning_rate": 0.001, | |
"loss": 31.7641, | |
"step": 100 | |
}, | |
{ | |
"epoch": 1.75, | |
"learning_rate": 0.001, | |
"loss": 31.7063, | |
"step": 110 | |
}, | |
{ | |
"epoch": 1.9, | |
"learning_rate": 0.001, | |
"loss": 31.85, | |
"step": 120 | |
}, | |
{ | |
"epoch": 2.06, | |
"learning_rate": 0.001, | |
"loss": 31.975, | |
"step": 130 | |
}, | |
{ | |
"epoch": 2.22, | |
"learning_rate": 0.001, | |
"loss": 31.9344, | |
"step": 140 | |
}, | |
{ | |
"epoch": 2.38, | |
"learning_rate": 0.001, | |
"loss": 32.0484, | |
"step": 150 | |
}, | |
{ | |
"epoch": 2.54, | |
"learning_rate": 0.001, | |
"loss": 32.0344, | |
"step": 160 | |
}, | |
{ | |
"epoch": 2.7, | |
"learning_rate": 0.001, | |
"loss": 32.0703, | |
"step": 170 | |
}, | |
{ | |
"epoch": 2.86, | |
"learning_rate": 0.001, | |
"loss": 32.0656, | |
"step": 180 | |
}, | |
{ | |
"epoch": 3.02, | |
"learning_rate": 0.001, | |
"loss": 32.0734, | |
"step": 190 | |
}, | |
{ | |
"epoch": 3.17, | |
"learning_rate": 0.001, | |
"loss": 32.2547, | |
"step": 200 | |
}, | |
{ | |
"epoch": 3.33, | |
"learning_rate": 0.001, | |
"loss": 32.1437, | |
"step": 210 | |
}, | |
{ | |
"epoch": 3.49, | |
"learning_rate": 0.001, | |
"loss": 32.3672, | |
"step": 220 | |
}, | |
{ | |
"epoch": 3.65, | |
"learning_rate": 0.001, | |
"loss": 32.2766, | |
"step": 230 | |
}, | |
{ | |
"epoch": 3.81, | |
"learning_rate": 0.001, | |
"loss": 32.3906, | |
"step": 240 | |
}, | |
{ | |
"epoch": 3.97, | |
"learning_rate": 0.001, | |
"loss": 32.6156, | |
"step": 250 | |
}, | |
{ | |
"epoch": 4.13, | |
"learning_rate": 0.001, | |
"loss": 32.5547, | |
"step": 260 | |
}, | |
{ | |
"epoch": 4.29, | |
"learning_rate": 0.001, | |
"loss": 32.8094, | |
"step": 270 | |
}, | |
{ | |
"epoch": 4.44, | |
"learning_rate": 0.001, | |
"loss": 32.7406, | |
"step": 280 | |
}, | |
{ | |
"epoch": 4.6, | |
"learning_rate": 0.001, | |
"loss": 32.9031, | |
"step": 290 | |
}, | |
{ | |
"epoch": 4.76, | |
"learning_rate": 0.001, | |
"loss": 32.8188, | |
"step": 300 | |
}, | |
{ | |
"epoch": 4.92, | |
"learning_rate": 0.001, | |
"loss": 32.9562, | |
"step": 310 | |
}, | |
{ | |
"epoch": 5.08, | |
"learning_rate": 0.001, | |
"loss": 32.9469, | |
"step": 320 | |
}, | |
{ | |
"epoch": 5.24, | |
"learning_rate": 0.001, | |
"loss": 33.0625, | |
"step": 330 | |
}, | |
{ | |
"epoch": 5.4, | |
"learning_rate": 0.001, | |
"loss": 33.0594, | |
"step": 340 | |
}, | |
{ | |
"epoch": 5.56, | |
"learning_rate": 0.001, | |
"loss": 33.1031, | |
"step": 350 | |
}, | |
{ | |
"epoch": 5.71, | |
"learning_rate": 0.001, | |
"loss": 33.2094, | |
"step": 360 | |
}, | |
{ | |
"epoch": 5.87, | |
"learning_rate": 0.001, | |
"loss": 33.2906, | |
"step": 370 | |
}, | |
{ | |
"epoch": 6.03, | |
"learning_rate": 0.001, | |
"loss": 33.3406, | |
"step": 380 | |
}, | |
{ | |
"epoch": 6.19, | |
"learning_rate": 0.001, | |
"loss": 33.2656, | |
"step": 390 | |
}, | |
{ | |
"epoch": 6.35, | |
"learning_rate": 0.001, | |
"loss": 33.3719, | |
"step": 400 | |
}, | |
{ | |
"epoch": 6.51, | |
"learning_rate": 0.001, | |
"loss": 33.4844, | |
"step": 410 | |
}, | |
{ | |
"epoch": 6.67, | |
"learning_rate": 0.001, | |
"loss": 33.4625, | |
"step": 420 | |
}, | |
{ | |
"epoch": 6.83, | |
"learning_rate": 0.001, | |
"loss": 33.4438, | |
"step": 430 | |
}, | |
{ | |
"epoch": 6.98, | |
"learning_rate": 0.001, | |
"loss": 33.5844, | |
"step": 440 | |
}, | |
{ | |
"epoch": 7.14, | |
"learning_rate": 0.001, | |
"loss": 33.6281, | |
"step": 450 | |
}, | |
{ | |
"epoch": 7.3, | |
"learning_rate": 0.001, | |
"loss": 33.7062, | |
"step": 460 | |
}, | |
{ | |
"epoch": 7.46, | |
"learning_rate": 0.001, | |
"loss": 33.6719, | |
"step": 470 | |
}, | |
{ | |
"epoch": 7.62, | |
"learning_rate": 0.001, | |
"loss": 33.7156, | |
"step": 480 | |
}, | |
{ | |
"epoch": 7.78, | |
"learning_rate": 0.001, | |
"loss": 33.75, | |
"step": 490 | |
}, | |
{ | |
"epoch": 7.94, | |
"learning_rate": 0.001, | |
"loss": 33.8344, | |
"step": 500 | |
}, | |
{ | |
"epoch": 8.1, | |
"learning_rate": 0.001, | |
"loss": 33.725, | |
"step": 510 | |
}, | |
{ | |
"epoch": 8.25, | |
"learning_rate": 0.001, | |
"loss": 33.9688, | |
"step": 520 | |
}, | |
{ | |
"epoch": 8.41, | |
"learning_rate": 0.001, | |
"loss": 33.9344, | |
"step": 530 | |
}, | |
{ | |
"epoch": 8.57, | |
"learning_rate": 0.001, | |
"loss": 34.1, | |
"step": 540 | |
}, | |
{ | |
"epoch": 8.73, | |
"learning_rate": 0.001, | |
"loss": 34.0719, | |
"step": 550 | |
}, | |
{ | |
"epoch": 8.89, | |
"learning_rate": 0.001, | |
"loss": 34.2563, | |
"step": 560 | |
}, | |
{ | |
"epoch": 9.05, | |
"learning_rate": 0.001, | |
"loss": 34.0438, | |
"step": 570 | |
}, | |
{ | |
"epoch": 9.21, | |
"learning_rate": 0.001, | |
"loss": 34.3094, | |
"step": 580 | |
}, | |
{ | |
"epoch": 9.37, | |
"learning_rate": 0.001, | |
"loss": 34.3687, | |
"step": 590 | |
}, | |
{ | |
"epoch": 9.52, | |
"learning_rate": 0.001, | |
"loss": 34.4406, | |
"step": 600 | |
}, | |
{ | |
"epoch": 9.68, | |
"learning_rate": 0.001, | |
"loss": 34.3531, | |
"step": 610 | |
}, | |
{ | |
"epoch": 9.84, | |
"learning_rate": 0.001, | |
"loss": 34.3344, | |
"step": 620 | |
}, | |
{ | |
"epoch": 10.0, | |
"learning_rate": 0.001, | |
"loss": 34.4125, | |
"step": 630 | |
}, | |
{ | |
"epoch": 10.16, | |
"learning_rate": 0.001, | |
"loss": 34.4, | |
"step": 640 | |
}, | |
{ | |
"epoch": 10.32, | |
"learning_rate": 0.001, | |
"loss": 34.4906, | |
"step": 650 | |
}, | |
{ | |
"epoch": 10.48, | |
"learning_rate": 0.001, | |
"loss": 34.5125, | |
"step": 660 | |
}, | |
{ | |
"epoch": 10.63, | |
"learning_rate": 0.001, | |
"loss": 34.5875, | |
"step": 670 | |
}, | |
{ | |
"epoch": 10.79, | |
"learning_rate": 0.001, | |
"loss": 34.7281, | |
"step": 680 | |
}, | |
{ | |
"epoch": 10.95, | |
"learning_rate": 0.001, | |
"loss": 34.825, | |
"step": 690 | |
}, | |
{ | |
"epoch": 11.11, | |
"learning_rate": 0.001, | |
"loss": 34.7437, | |
"step": 700 | |
}, | |
{ | |
"epoch": 11.27, | |
"learning_rate": 0.001, | |
"loss": 34.6531, | |
"step": 710 | |
}, | |
{ | |
"epoch": 11.43, | |
"learning_rate": 0.001, | |
"loss": 34.8531, | |
"step": 720 | |
}, | |
{ | |
"epoch": 11.59, | |
"learning_rate": 0.001, | |
"loss": 34.8438, | |
"step": 730 | |
}, | |
{ | |
"epoch": 11.75, | |
"learning_rate": 0.001, | |
"loss": 35.0, | |
"step": 740 | |
}, | |
{ | |
"epoch": 11.9, | |
"learning_rate": 0.001, | |
"loss": 34.9562, | |
"step": 750 | |
}, | |
{ | |
"epoch": 12.06, | |
"learning_rate": 0.001, | |
"loss": 34.9781, | |
"step": 760 | |
}, | |
{ | |
"epoch": 12.22, | |
"learning_rate": 0.001, | |
"loss": 35.0625, | |
"step": 770 | |
}, | |
{ | |
"epoch": 12.38, | |
"learning_rate": 0.001, | |
"loss": 35.1063, | |
"step": 780 | |
}, | |
{ | |
"epoch": 12.54, | |
"learning_rate": 0.001, | |
"loss": 35.1594, | |
"step": 790 | |
}, | |
{ | |
"epoch": 12.7, | |
"learning_rate": 0.001, | |
"loss": 35.2, | |
"step": 800 | |
}, | |
{ | |
"epoch": 12.86, | |
"learning_rate": 0.001, | |
"loss": 35.1281, | |
"step": 810 | |
}, | |
{ | |
"epoch": 13.02, | |
"learning_rate": 0.001, | |
"loss": 35.3563, | |
"step": 820 | |
}, | |
{ | |
"epoch": 13.17, | |
"learning_rate": 0.001, | |
"loss": 35.1688, | |
"step": 830 | |
}, | |
{ | |
"epoch": 13.33, | |
"learning_rate": 0.001, | |
"loss": 35.3625, | |
"step": 840 | |
}, | |
{ | |
"epoch": 13.49, | |
"learning_rate": 0.001, | |
"loss": 35.5438, | |
"step": 850 | |
}, | |
{ | |
"epoch": 13.65, | |
"learning_rate": 0.001, | |
"loss": 35.3937, | |
"step": 860 | |
}, | |
{ | |
"epoch": 13.81, | |
"learning_rate": 0.001, | |
"loss": 35.3937, | |
"step": 870 | |
}, | |
{ | |
"epoch": 13.97, | |
"learning_rate": 0.001, | |
"loss": 35.5938, | |
"step": 880 | |
}, | |
{ | |
"epoch": 14.13, | |
"learning_rate": 0.001, | |
"loss": 35.3781, | |
"step": 890 | |
}, | |
{ | |
"epoch": 14.29, | |
"learning_rate": 0.001, | |
"loss": 35.775, | |
"step": 900 | |
}, | |
{ | |
"epoch": 14.44, | |
"learning_rate": 0.001, | |
"loss": 35.7594, | |
"step": 910 | |
}, | |
{ | |
"epoch": 14.6, | |
"learning_rate": 0.001, | |
"loss": 35.5438, | |
"step": 920 | |
}, | |
{ | |
"epoch": 14.76, | |
"learning_rate": 0.001, | |
"loss": 35.8406, | |
"step": 930 | |
}, | |
{ | |
"epoch": 14.92, | |
"learning_rate": 0.001, | |
"loss": 35.6, | |
"step": 940 | |
}, | |
{ | |
"epoch": 15.08, | |
"learning_rate": 0.001, | |
"loss": 35.8844, | |
"step": 950 | |
}, | |
{ | |
"epoch": 15.24, | |
"learning_rate": 0.001, | |
"loss": 35.9594, | |
"step": 960 | |
}, | |
{ | |
"epoch": 15.4, | |
"learning_rate": 0.001, | |
"loss": 35.8281, | |
"step": 970 | |
}, | |
{ | |
"epoch": 15.56, | |
"learning_rate": 0.001, | |
"loss": 36.0125, | |
"step": 980 | |
}, | |
{ | |
"epoch": 15.71, | |
"learning_rate": 0.001, | |
"loss": 36.1313, | |
"step": 990 | |
}, | |
{ | |
"epoch": 15.87, | |
"learning_rate": 0.001, | |
"loss": 36.0656, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 16.03, | |
"learning_rate": 0.001, | |
"loss": 36.1938, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 16.19, | |
"learning_rate": 0.001, | |
"loss": 36.2031, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 16.35, | |
"learning_rate": 0.001, | |
"loss": 36.1688, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 16.51, | |
"learning_rate": 0.001, | |
"loss": 36.3156, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 16.67, | |
"learning_rate": 0.001, | |
"loss": 36.2719, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 16.83, | |
"learning_rate": 0.001, | |
"loss": 36.4656, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 16.98, | |
"learning_rate": 0.001, | |
"loss": 36.3687, | |
"step": 1070 | |
}, | |
{ | |
"epoch": 17.14, | |
"learning_rate": 0.001, | |
"loss": 36.4031, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 17.3, | |
"learning_rate": 0.001, | |
"loss": 36.5469, | |
"step": 1090 | |
}, | |
{ | |
"epoch": 17.46, | |
"learning_rate": 0.001, | |
"loss": 36.5094, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 17.62, | |
"learning_rate": 0.001, | |
"loss": 36.6156, | |
"step": 1110 | |
}, | |
{ | |
"epoch": 17.78, | |
"learning_rate": 0.001, | |
"loss": 36.5531, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 17.94, | |
"learning_rate": 0.001, | |
"loss": 36.525, | |
"step": 1130 | |
}, | |
{ | |
"epoch": 18.1, | |
"learning_rate": 0.001, | |
"loss": 36.6562, | |
"step": 1140 | |
}, | |
{ | |
"epoch": 18.25, | |
"learning_rate": 0.001, | |
"loss": 36.5812, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 18.41, | |
"learning_rate": 0.001, | |
"loss": 36.6625, | |
"step": 1160 | |
}, | |
{ | |
"epoch": 18.57, | |
"learning_rate": 0.001, | |
"loss": 36.625, | |
"step": 1170 | |
}, | |
{ | |
"epoch": 18.73, | |
"learning_rate": 0.001, | |
"loss": 36.8719, | |
"step": 1180 | |
}, | |
{ | |
"epoch": 18.89, | |
"learning_rate": 0.001, | |
"loss": 36.9219, | |
"step": 1190 | |
}, | |
{ | |
"epoch": 19.05, | |
"learning_rate": 0.001, | |
"loss": 36.8875, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 19.21, | |
"learning_rate": 0.001, | |
"loss": 36.7344, | |
"step": 1210 | |
}, | |
{ | |
"epoch": 19.37, | |
"learning_rate": 0.001, | |
"loss": 36.9562, | |
"step": 1220 | |
}, | |
{ | |
"epoch": 19.52, | |
"learning_rate": 0.001, | |
"loss": 37.1688, | |
"step": 1230 | |
}, | |
{ | |
"epoch": 19.68, | |
"learning_rate": 0.001, | |
"loss": 37.2156, | |
"step": 1240 | |
}, | |
{ | |
"epoch": 19.84, | |
"learning_rate": 0.001, | |
"loss": 37.1719, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 20.0, | |
"learning_rate": 0.001, | |
"loss": 37.2594, | |
"step": 1260 | |
}, | |
{ | |
"epoch": 20.16, | |
"learning_rate": 0.001, | |
"loss": 37.4062, | |
"step": 1270 | |
}, | |
{ | |
"epoch": 20.32, | |
"learning_rate": 0.001, | |
"loss": 37.4937, | |
"step": 1280 | |
}, | |
{ | |
"epoch": 20.48, | |
"learning_rate": 0.001, | |
"loss": 37.5625, | |
"step": 1290 | |
}, | |
{ | |
"epoch": 20.63, | |
"learning_rate": 0.001, | |
"loss": 37.6, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 20.79, | |
"learning_rate": 0.001, | |
"loss": 37.6156, | |
"step": 1310 | |
}, | |
{ | |
"epoch": 20.95, | |
"learning_rate": 0.001, | |
"loss": 37.625, | |
"step": 1320 | |
}, | |
{ | |
"epoch": 21.11, | |
"learning_rate": 0.001, | |
"loss": 37.5906, | |
"step": 1330 | |
}, | |
{ | |
"epoch": 21.27, | |
"learning_rate": 0.001, | |
"loss": 37.8344, | |
"step": 1340 | |
}, | |
{ | |
"epoch": 21.43, | |
"learning_rate": 0.001, | |
"loss": 37.7656, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 21.59, | |
"learning_rate": 0.001, | |
"loss": 37.8875, | |
"step": 1360 | |
}, | |
{ | |
"epoch": 21.75, | |
"learning_rate": 0.001, | |
"loss": 37.6437, | |
"step": 1370 | |
}, | |
{ | |
"epoch": 21.9, | |
"learning_rate": 0.001, | |
"loss": 38.0063, | |
"step": 1380 | |
}, | |
{ | |
"epoch": 22.06, | |
"learning_rate": 0.001, | |
"loss": 38.0594, | |
"step": 1390 | |
}, | |
{ | |
"epoch": 22.22, | |
"learning_rate": 0.001, | |
"loss": 37.9656, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 22.38, | |
"learning_rate": 0.001, | |
"loss": 38.125, | |
"step": 1410 | |
}, | |
{ | |
"epoch": 22.54, | |
"learning_rate": 0.001, | |
"loss": 38.1469, | |
"step": 1420 | |
}, | |
{ | |
"epoch": 22.7, | |
"learning_rate": 0.001, | |
"loss": 38.2188, | |
"step": 1430 | |
}, | |
{ | |
"epoch": 22.86, | |
"learning_rate": 0.001, | |
"loss": 38.0969, | |
"step": 1440 | |
}, | |
{ | |
"epoch": 23.02, | |
"learning_rate": 0.001, | |
"loss": 38.3469, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 23.17, | |
"learning_rate": 0.001, | |
"loss": 38.3563, | |
"step": 1460 | |
}, | |
{ | |
"epoch": 23.33, | |
"learning_rate": 0.001, | |
"loss": 38.3719, | |
"step": 1470 | |
}, | |
{ | |
"epoch": 23.49, | |
"learning_rate": 0.001, | |
"loss": 38.3375, | |
"step": 1480 | |
}, | |
{ | |
"epoch": 23.65, | |
"learning_rate": 0.001, | |
"loss": 38.5375, | |
"step": 1490 | |
}, | |
{ | |
"epoch": 23.81, | |
"learning_rate": 0.001, | |
"loss": 38.4531, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 23.97, | |
"learning_rate": 0.001, | |
"loss": 38.4469, | |
"step": 1510 | |
}, | |
{ | |
"epoch": 24.13, | |
"learning_rate": 0.001, | |
"loss": 38.4375, | |
"step": 1520 | |
}, | |
{ | |
"epoch": 24.29, | |
"learning_rate": 0.001, | |
"loss": 38.6406, | |
"step": 1530 | |
}, | |
{ | |
"epoch": 24.44, | |
"learning_rate": 0.001, | |
"loss": 38.7156, | |
"step": 1540 | |
}, | |
{ | |
"epoch": 24.6, | |
"learning_rate": 0.001, | |
"loss": 38.7719, | |
"step": 1550 | |
}, | |
{ | |
"epoch": 24.76, | |
"learning_rate": 0.001, | |
"loss": 38.7313, | |
"step": 1560 | |
}, | |
{ | |
"epoch": 24.92, | |
"learning_rate": 0.001, | |
"loss": 38.9125, | |
"step": 1570 | |
}, | |
{ | |
"epoch": 25.08, | |
"learning_rate": 0.001, | |
"loss": 38.8281, | |
"step": 1580 | |
}, | |
{ | |
"epoch": 25.24, | |
"learning_rate": 0.001, | |
"loss": 38.9062, | |
"step": 1590 | |
}, | |
{ | |
"epoch": 25.4, | |
"learning_rate": 0.001, | |
"loss": 39.025, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 25.56, | |
"learning_rate": 0.001, | |
"loss": 38.9125, | |
"step": 1610 | |
}, | |
{ | |
"epoch": 25.71, | |
"learning_rate": 0.001, | |
"loss": 39.0594, | |
"step": 1620 | |
}, | |
{ | |
"epoch": 25.87, | |
"learning_rate": 0.001, | |
"loss": 38.9625, | |
"step": 1630 | |
}, | |
{ | |
"epoch": 26.03, | |
"learning_rate": 0.001, | |
"loss": 39.1187, | |
"step": 1640 | |
}, | |
{ | |
"epoch": 26.19, | |
"learning_rate": 0.001, | |
"loss": 39.1, | |
"step": 1650 | |
}, | |
{ | |
"epoch": 26.35, | |
"learning_rate": 0.001, | |
"loss": 39.1313, | |
"step": 1660 | |
}, | |
{ | |
"epoch": 26.51, | |
"learning_rate": 0.001, | |
"loss": 39.2906, | |
"step": 1670 | |
}, | |
{ | |
"epoch": 26.67, | |
"learning_rate": 0.001, | |
"loss": 39.1719, | |
"step": 1680 | |
}, | |
{ | |
"epoch": 26.83, | |
"learning_rate": 0.001, | |
"loss": 39.2687, | |
"step": 1690 | |
}, | |
{ | |
"epoch": 26.98, | |
"learning_rate": 0.001, | |
"loss": 39.4312, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 27.14, | |
"learning_rate": 0.001, | |
"loss": 39.2563, | |
"step": 1710 | |
}, | |
{ | |
"epoch": 27.3, | |
"learning_rate": 0.001, | |
"loss": 39.3344, | |
"step": 1720 | |
}, | |
{ | |
"epoch": 27.46, | |
"learning_rate": 0.001, | |
"loss": 39.375, | |
"step": 1730 | |
}, | |
{ | |
"epoch": 27.62, | |
"learning_rate": 0.001, | |
"loss": 39.4031, | |
"step": 1740 | |
}, | |
{ | |
"epoch": 27.78, | |
"learning_rate": 0.001, | |
"loss": 39.4969, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 27.94, | |
"learning_rate": 0.001, | |
"loss": 39.5219, | |
"step": 1760 | |
}, | |
{ | |
"epoch": 28.1, | |
"learning_rate": 0.001, | |
"loss": 39.5719, | |
"step": 1770 | |
}, | |
{ | |
"epoch": 28.25, | |
"learning_rate": 0.001, | |
"loss": 39.6719, | |
"step": 1780 | |
}, | |
{ | |
"epoch": 28.41, | |
"learning_rate": 0.001, | |
"loss": 39.7156, | |
"step": 1790 | |
}, | |
{ | |
"epoch": 28.57, | |
"learning_rate": 0.001, | |
"loss": 39.8312, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 28.73, | |
"learning_rate": 0.001, | |
"loss": 39.8687, | |
"step": 1810 | |
}, | |
{ | |
"epoch": 28.89, | |
"learning_rate": 0.001, | |
"loss": 39.9906, | |
"step": 1820 | |
}, | |
{ | |
"epoch": 29.05, | |
"learning_rate": 0.001, | |
"loss": 40.0156, | |
"step": 1830 | |
}, | |
{ | |
"epoch": 29.21, | |
"learning_rate": 0.001, | |
"loss": 40.1469, | |
"step": 1840 | |
}, | |
{ | |
"epoch": 29.37, | |
"learning_rate": 0.001, | |
"loss": 40.1938, | |
"step": 1850 | |
}, | |
{ | |
"epoch": 29.52, | |
"learning_rate": 0.001, | |
"loss": 40.1594, | |
"step": 1860 | |
}, | |
{ | |
"epoch": 29.68, | |
"learning_rate": 0.001, | |
"loss": 39.9094, | |
"step": 1870 | |
}, | |
{ | |
"epoch": 29.84, | |
"learning_rate": 0.001, | |
"loss": 40.1625, | |
"step": 1880 | |
}, | |
{ | |
"epoch": 30.0, | |
"learning_rate": 0.001, | |
"loss": 40.1906, | |
"step": 1890 | |
}, | |
{ | |
"epoch": 30.16, | |
"learning_rate": 0.001, | |
"loss": 40.3406, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 30.32, | |
"learning_rate": 0.001, | |
"loss": 40.2812, | |
"step": 1910 | |
}, | |
{ | |
"epoch": 30.48, | |
"learning_rate": 0.001, | |
"loss": 40.2969, | |
"step": 1920 | |
}, | |
{ | |
"epoch": 30.63, | |
"learning_rate": 0.001, | |
"loss": 40.2469, | |
"step": 1930 | |
}, | |
{ | |
"epoch": 30.79, | |
"learning_rate": 0.001, | |
"loss": 40.3656, | |
"step": 1940 | |
}, | |
{ | |
"epoch": 30.95, | |
"learning_rate": 0.001, | |
"loss": 40.4156, | |
"step": 1950 | |
}, | |
{ | |
"epoch": 31.11, | |
"learning_rate": 0.001, | |
"loss": 40.4844, | |
"step": 1960 | |
}, | |
{ | |
"epoch": 31.27, | |
"learning_rate": 0.001, | |
"loss": 40.4531, | |
"step": 1970 | |
}, | |
{ | |
"epoch": 31.43, | |
"learning_rate": 0.001, | |
"loss": 40.6187, | |
"step": 1980 | |
}, | |
{ | |
"epoch": 31.59, | |
"learning_rate": 0.001, | |
"loss": 40.8844, | |
"step": 1990 | |
}, | |
{ | |
"epoch": 31.75, | |
"learning_rate": 0.001, | |
"loss": 40.8125, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 31.9, | |
"learning_rate": 0.001, | |
"loss": 40.9562, | |
"step": 2010 | |
}, | |
{ | |
"epoch": 32.06, | |
"learning_rate": 0.001, | |
"loss": 40.8312, | |
"step": 2020 | |
}, | |
{ | |
"epoch": 32.22, | |
"learning_rate": 0.001, | |
"loss": 40.825, | |
"step": 2030 | |
}, | |
{ | |
"epoch": 32.38, | |
"learning_rate": 0.001, | |
"loss": 41.0594, | |
"step": 2040 | |
}, | |
{ | |
"epoch": 32.54, | |
"learning_rate": 0.001, | |
"loss": 40.8875, | |
"step": 2050 | |
}, | |
{ | |
"epoch": 32.7, | |
"learning_rate": 0.001, | |
"loss": 40.8062, | |
"step": 2060 | |
}, | |
{ | |
"epoch": 32.86, | |
"learning_rate": 0.001, | |
"loss": 41.0844, | |
"step": 2070 | |
}, | |
{ | |
"epoch": 33.02, | |
"learning_rate": 0.001, | |
"loss": 41.1187, | |
"step": 2080 | |
}, | |
{ | |
"epoch": 33.17, | |
"learning_rate": 0.001, | |
"loss": 41.1031, | |
"step": 2090 | |
}, | |
{ | |
"epoch": 33.33, | |
"learning_rate": 0.001, | |
"loss": 41.2344, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 33.49, | |
"learning_rate": 0.001, | |
"loss": 41.175, | |
"step": 2110 | |
}, | |
{ | |
"epoch": 33.65, | |
"learning_rate": 0.001, | |
"loss": 41.4813, | |
"step": 2120 | |
}, | |
{ | |
"epoch": 33.81, | |
"learning_rate": 0.001, | |
"loss": 41.4031, | |
"step": 2130 | |
}, | |
{ | |
"epoch": 33.97, | |
"learning_rate": 0.001, | |
"loss": 41.2875, | |
"step": 2140 | |
}, | |
{ | |
"epoch": 34.13, | |
"learning_rate": 0.001, | |
"loss": 41.6031, | |
"step": 2150 | |
}, | |
{ | |
"epoch": 34.29, | |
"learning_rate": 0.001, | |
"loss": 41.3781, | |
"step": 2160 | |
}, | |
{ | |
"epoch": 34.44, | |
"learning_rate": 0.001, | |
"loss": 41.7094, | |
"step": 2170 | |
}, | |
{ | |
"epoch": 34.6, | |
"learning_rate": 0.001, | |
"loss": 41.6, | |
"step": 2180 | |
}, | |
{ | |
"epoch": 34.76, | |
"learning_rate": 0.001, | |
"loss": 41.6313, | |
"step": 2190 | |
}, | |
{ | |
"epoch": 34.92, | |
"learning_rate": 0.001, | |
"loss": 41.7125, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 35.08, | |
"learning_rate": 0.001, | |
"loss": 41.7625, | |
"step": 2210 | |
}, | |
{ | |
"epoch": 35.24, | |
"learning_rate": 0.001, | |
"loss": 41.8656, | |
"step": 2220 | |
}, | |
{ | |
"epoch": 35.4, | |
"learning_rate": 0.001, | |
"loss": 41.8531, | |
"step": 2230 | |
}, | |
{ | |
"epoch": 35.56, | |
"learning_rate": 0.001, | |
"loss": 41.9312, | |
"step": 2240 | |
}, | |
{ | |
"epoch": 35.71, | |
"learning_rate": 0.001, | |
"loss": 41.9406, | |
"step": 2250 | |
}, | |
{ | |
"epoch": 35.87, | |
"learning_rate": 0.001, | |
"loss": 42.0187, | |
"step": 2260 | |
}, | |
{ | |
"epoch": 36.03, | |
"learning_rate": 0.001, | |
"loss": 42.2437, | |
"step": 2270 | |
}, | |
{ | |
"epoch": 36.19, | |
"learning_rate": 0.001, | |
"loss": 42.15, | |
"step": 2280 | |
}, | |
{ | |
"epoch": 36.35, | |
"learning_rate": 0.001, | |
"loss": 42.2094, | |
"step": 2290 | |
}, | |
{ | |
"epoch": 36.51, | |
"learning_rate": 0.001, | |
"loss": 42.2344, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 36.67, | |
"learning_rate": 0.001, | |
"loss": 42.4469, | |
"step": 2310 | |
}, | |
{ | |
"epoch": 36.83, | |
"learning_rate": 0.001, | |
"loss": 42.3031, | |
"step": 2320 | |
}, | |
{ | |
"epoch": 36.98, | |
"learning_rate": 0.001, | |
"loss": 42.3625, | |
"step": 2330 | |
}, | |
{ | |
"epoch": 37.14, | |
"learning_rate": 0.001, | |
"loss": 42.7094, | |
"step": 2340 | |
}, | |
{ | |
"epoch": 37.3, | |
"learning_rate": 0.001, | |
"loss": 42.6344, | |
"step": 2350 | |
}, | |
{ | |
"epoch": 37.46, | |
"learning_rate": 0.001, | |
"loss": 42.6531, | |
"step": 2360 | |
}, | |
{ | |
"epoch": 37.62, | |
"learning_rate": 0.001, | |
"loss": 42.6063, | |
"step": 2370 | |
}, | |
{ | |
"epoch": 37.78, | |
"learning_rate": 0.001, | |
"loss": 42.6313, | |
"step": 2380 | |
}, | |
{ | |
"epoch": 37.94, | |
"learning_rate": 0.001, | |
"loss": 42.7375, | |
"step": 2390 | |
}, | |
{ | |
"epoch": 38.1, | |
"learning_rate": 0.001, | |
"loss": 42.7563, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 38.25, | |
"learning_rate": 0.001, | |
"loss": 42.7812, | |
"step": 2410 | |
}, | |
{ | |
"epoch": 38.41, | |
"learning_rate": 0.001, | |
"loss": 42.7969, | |
"step": 2420 | |
}, | |
{ | |
"epoch": 38.57, | |
"learning_rate": 0.001, | |
"loss": 42.8375, | |
"step": 2430 | |
}, | |
{ | |
"epoch": 38.73, | |
"learning_rate": 0.001, | |
"loss": 43.0281, | |
"step": 2440 | |
}, | |
{ | |
"epoch": 38.89, | |
"learning_rate": 0.001, | |
"loss": 42.9031, | |
"step": 2450 | |
}, | |
{ | |
"epoch": 39.05, | |
"learning_rate": 0.001, | |
"loss": 42.9969, | |
"step": 2460 | |
}, | |
{ | |
"epoch": 39.21, | |
"learning_rate": 0.001, | |
"loss": 43.075, | |
"step": 2470 | |
}, | |
{ | |
"epoch": 39.37, | |
"learning_rate": 0.001, | |
"loss": 43.075, | |
"step": 2480 | |
}, | |
{ | |
"epoch": 39.52, | |
"learning_rate": 0.001, | |
"loss": 43.0812, | |
"step": 2490 | |
}, | |
{ | |
"epoch": 39.68, | |
"learning_rate": 0.001, | |
"loss": 43.2406, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 39.84, | |
"learning_rate": 0.001, | |
"loss": 43.1375, | |
"step": 2510 | |
}, | |
{ | |
"epoch": 40.0, | |
"learning_rate": 0.001, | |
"loss": 43.1625, | |
"step": 2520 | |
}, | |
{ | |
"epoch": 40.16, | |
"learning_rate": 0.001, | |
"loss": 43.2313, | |
"step": 2530 | |
}, | |
{ | |
"epoch": 40.32, | |
"learning_rate": 0.001, | |
"loss": 43.4375, | |
"step": 2540 | |
}, | |
{ | |
"epoch": 40.48, | |
"learning_rate": 0.001, | |
"loss": 43.4719, | |
"step": 2550 | |
}, | |
{ | |
"epoch": 40.63, | |
"learning_rate": 0.001, | |
"loss": 43.3969, | |
"step": 2560 | |
}, | |
{ | |
"epoch": 40.79, | |
"learning_rate": 0.001, | |
"loss": 43.675, | |
"step": 2570 | |
}, | |
{ | |
"epoch": 40.95, | |
"learning_rate": 0.001, | |
"loss": 43.5562, | |
"step": 2580 | |
}, | |
{ | |
"epoch": 41.11, | |
"learning_rate": 0.001, | |
"loss": 43.6844, | |
"step": 2590 | |
}, | |
{ | |
"epoch": 41.27, | |
"learning_rate": 0.001, | |
"loss": 43.8094, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 41.43, | |
"learning_rate": 0.001, | |
"loss": 43.8656, | |
"step": 2610 | |
}, | |
{ | |
"epoch": 41.59, | |
"learning_rate": 0.001, | |
"loss": 43.9969, | |
"step": 2620 | |
}, | |
{ | |
"epoch": 41.75, | |
"learning_rate": 0.001, | |
"loss": 43.8469, | |
"step": 2630 | |
}, | |
{ | |
"epoch": 41.9, | |
"learning_rate": 0.001, | |
"loss": 44.0688, | |
"step": 2640 | |
}, | |
{ | |
"epoch": 42.06, | |
"learning_rate": 0.001, | |
"loss": 43.9531, | |
"step": 2650 | |
}, | |
{ | |
"epoch": 42.22, | |
"learning_rate": 0.001, | |
"loss": 43.9937, | |
"step": 2660 | |
}, | |
{ | |
"epoch": 42.38, | |
"learning_rate": 0.001, | |
"loss": 44.1125, | |
"step": 2670 | |
}, | |
{ | |
"epoch": 42.54, | |
"learning_rate": 0.001, | |
"loss": 44.1969, | |
"step": 2680 | |
}, | |
{ | |
"epoch": 42.7, | |
"learning_rate": 0.001, | |
"loss": 44.1781, | |
"step": 2690 | |
}, | |
{ | |
"epoch": 42.86, | |
"learning_rate": 0.001, | |
"loss": 44.3781, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 43.02, | |
"learning_rate": 0.001, | |
"loss": 44.3125, | |
"step": 2710 | |
}, | |
{ | |
"epoch": 43.17, | |
"learning_rate": 0.001, | |
"loss": 44.2875, | |
"step": 2720 | |
}, | |
{ | |
"epoch": 43.33, | |
"learning_rate": 0.001, | |
"loss": 44.3344, | |
"step": 2730 | |
}, | |
{ | |
"epoch": 43.49, | |
"learning_rate": 0.001, | |
"loss": 44.3188, | |
"step": 2740 | |
}, | |
{ | |
"epoch": 43.65, | |
"learning_rate": 0.001, | |
"loss": 44.3281, | |
"step": 2750 | |
}, | |
{ | |
"epoch": 43.81, | |
"learning_rate": 0.001, | |
"loss": 44.5063, | |
"step": 2760 | |
}, | |
{ | |
"epoch": 43.97, | |
"learning_rate": 0.001, | |
"loss": 44.5625, | |
"step": 2770 | |
}, | |
{ | |
"epoch": 44.13, | |
"learning_rate": 0.001, | |
"loss": 44.2313, | |
"step": 2780 | |
}, | |
{ | |
"epoch": 44.29, | |
"learning_rate": 0.001, | |
"loss": 44.7313, | |
"step": 2790 | |
}, | |
{ | |
"epoch": 44.44, | |
"learning_rate": 0.001, | |
"loss": 44.6125, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 44.6, | |
"learning_rate": 0.001, | |
"loss": 44.7125, | |
"step": 2810 | |
}, | |
{ | |
"epoch": 44.76, | |
"learning_rate": 0.001, | |
"loss": 44.9594, | |
"step": 2820 | |
}, | |
{ | |
"epoch": 44.92, | |
"learning_rate": 0.001, | |
"loss": 44.975, | |
"step": 2830 | |
}, | |
{ | |
"epoch": 45.08, | |
"learning_rate": 0.001, | |
"loss": 44.9531, | |
"step": 2840 | |
}, | |
{ | |
"epoch": 45.24, | |
"learning_rate": 0.001, | |
"loss": 44.9406, | |
"step": 2850 | |
}, | |
{ | |
"epoch": 45.4, | |
"learning_rate": 0.001, | |
"loss": 45.0812, | |
"step": 2860 | |
}, | |
{ | |
"epoch": 45.56, | |
"learning_rate": 0.001, | |
"loss": 45.1969, | |
"step": 2870 | |
}, | |
{ | |
"epoch": 45.71, | |
"learning_rate": 0.001, | |
"loss": 44.9125, | |
"step": 2880 | |
}, | |
{ | |
"epoch": 45.87, | |
"learning_rate": 0.001, | |
"loss": 45.0312, | |
"step": 2890 | |
}, | |
{ | |
"epoch": 46.03, | |
"learning_rate": 0.001, | |
"loss": 45.1406, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 46.19, | |
"learning_rate": 0.001, | |
"loss": 45.3125, | |
"step": 2910 | |
}, | |
{ | |
"epoch": 46.35, | |
"learning_rate": 0.001, | |
"loss": 45.4719, | |
"step": 2920 | |
}, | |
{ | |
"epoch": 46.51, | |
"learning_rate": 0.001, | |
"loss": 45.3312, | |
"step": 2930 | |
}, | |
{ | |
"epoch": 46.67, | |
"learning_rate": 0.001, | |
"loss": 45.5156, | |
"step": 2940 | |
}, | |
{ | |
"epoch": 46.83, | |
"learning_rate": 0.001, | |
"loss": 45.5406, | |
"step": 2950 | |
}, | |
{ | |
"epoch": 46.98, | |
"learning_rate": 0.001, | |
"loss": 45.6063, | |
"step": 2960 | |
}, | |
{ | |
"epoch": 47.14, | |
"learning_rate": 0.001, | |
"loss": 45.5781, | |
"step": 2970 | |
}, | |
{ | |
"epoch": 47.3, | |
"learning_rate": 0.001, | |
"loss": 45.8438, | |
"step": 2980 | |
}, | |
{ | |
"epoch": 47.46, | |
"learning_rate": 0.001, | |
"loss": 45.8813, | |
"step": 2990 | |
}, | |
{ | |
"epoch": 47.62, | |
"learning_rate": 0.001, | |
"loss": 46.0375, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 47.78, | |
"learning_rate": 0.001, | |
"loss": 45.7313, | |
"step": 3010 | |
}, | |
{ | |
"epoch": 47.94, | |
"learning_rate": 0.001, | |
"loss": 45.7844, | |
"step": 3020 | |
}, | |
{ | |
"epoch": 48.1, | |
"learning_rate": 0.001, | |
"loss": 46.0875, | |
"step": 3030 | |
}, | |
{ | |
"epoch": 48.25, | |
"learning_rate": 0.001, | |
"loss": 45.8438, | |
"step": 3040 | |
}, | |
{ | |
"epoch": 48.41, | |
"learning_rate": 0.001, | |
"loss": 45.9375, | |
"step": 3050 | |
}, | |
{ | |
"epoch": 48.57, | |
"learning_rate": 0.001, | |
"loss": 46.0656, | |
"step": 3060 | |
}, | |
{ | |
"epoch": 48.73, | |
"learning_rate": 0.001, | |
"loss": 46.0469, | |
"step": 3070 | |
}, | |
{ | |
"epoch": 48.89, | |
"learning_rate": 0.001, | |
"loss": 46.325, | |
"step": 3080 | |
}, | |
{ | |
"epoch": 49.05, | |
"learning_rate": 0.001, | |
"loss": 46.2125, | |
"step": 3090 | |
}, | |
{ | |
"epoch": 49.21, | |
"learning_rate": 0.001, | |
"loss": 46.3438, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 49.37, | |
"learning_rate": 0.001, | |
"loss": 46.4562, | |
"step": 3110 | |
}, | |
{ | |
"epoch": 49.52, | |
"learning_rate": 0.001, | |
"loss": 46.3875, | |
"step": 3120 | |
}, | |
{ | |
"epoch": 49.68, | |
"learning_rate": 0.001, | |
"loss": 46.1625, | |
"step": 3130 | |
}, | |
{ | |
"epoch": 49.84, | |
"learning_rate": 0.001, | |
"loss": 46.7, | |
"step": 3140 | |
}, | |
{ | |
"epoch": 50.0, | |
"learning_rate": 0.001, | |
"loss": 46.4406, | |
"step": 3150 | |
}, | |
{ | |
"epoch": 50.16, | |
"learning_rate": 0.001, | |
"loss": 46.5906, | |
"step": 3160 | |
}, | |
{ | |
"epoch": 50.32, | |
"learning_rate": 0.001, | |
"loss": 46.6156, | |
"step": 3170 | |
}, | |
{ | |
"epoch": 50.48, | |
"learning_rate": 0.001, | |
"loss": 46.6187, | |
"step": 3180 | |
}, | |
{ | |
"epoch": 50.63, | |
"learning_rate": 0.001, | |
"loss": 46.8219, | |
"step": 3190 | |
}, | |
{ | |
"epoch": 50.79, | |
"learning_rate": 0.001, | |
"loss": 46.7281, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 50.95, | |
"learning_rate": 0.001, | |
"loss": 46.6469, | |
"step": 3210 | |
}, | |
{ | |
"epoch": 51.11, | |
"learning_rate": 0.001, | |
"loss": 46.7031, | |
"step": 3220 | |
}, | |
{ | |
"epoch": 51.27, | |
"learning_rate": 0.001, | |
"loss": 46.8312, | |
"step": 3230 | |
}, | |
{ | |
"epoch": 51.43, | |
"learning_rate": 0.001, | |
"loss": 47.0844, | |
"step": 3240 | |
}, | |
{ | |
"epoch": 51.59, | |
"learning_rate": 0.001, | |
"loss": 46.9156, | |
"step": 3250 | |
}, | |
{ | |
"epoch": 51.75, | |
"learning_rate": 0.001, | |
"loss": 46.9813, | |
"step": 3260 | |
}, | |
{ | |
"epoch": 51.9, | |
"learning_rate": 0.001, | |
"loss": 47.3062, | |
"step": 3270 | |
}, | |
{ | |
"epoch": 52.06, | |
"learning_rate": 0.001, | |
"loss": 47.2313, | |
"step": 3280 | |
}, | |
{ | |
"epoch": 52.22, | |
"learning_rate": 0.001, | |
"loss": 47.2656, | |
"step": 3290 | |
}, | |
{ | |
"epoch": 52.38, | |
"learning_rate": 0.001, | |
"loss": 47.2031, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 52.54, | |
"learning_rate": 0.001, | |
"loss": 47.2812, | |
"step": 3310 | |
}, | |
{ | |
"epoch": 52.7, | |
"learning_rate": 0.001, | |
"loss": 47.2281, | |
"step": 3320 | |
}, | |
{ | |
"epoch": 52.86, | |
"learning_rate": 0.001, | |
"loss": 47.2906, | |
"step": 3330 | |
}, | |
{ | |
"epoch": 53.02, | |
"learning_rate": 0.001, | |
"loss": 47.4469, | |
"step": 3340 | |
}, | |
{ | |
"epoch": 53.17, | |
"learning_rate": 0.001, | |
"loss": 47.3719, | |
"step": 3350 | |
}, | |
{ | |
"epoch": 53.33, | |
"learning_rate": 0.001, | |
"loss": 47.4813, | |
"step": 3360 | |
}, | |
{ | |
"epoch": 53.49, | |
"learning_rate": 0.001, | |
"loss": 47.5906, | |
"step": 3370 | |
}, | |
{ | |
"epoch": 53.65, | |
"learning_rate": 0.001, | |
"loss": 47.5719, | |
"step": 3380 | |
}, | |
{ | |
"epoch": 53.81, | |
"learning_rate": 0.001, | |
"loss": 47.5938, | |
"step": 3390 | |
}, | |
{ | |
"epoch": 53.97, | |
"learning_rate": 0.001, | |
"loss": 47.5906, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 54.13, | |
"learning_rate": 0.001, | |
"loss": 47.8594, | |
"step": 3410 | |
}, | |
{ | |
"epoch": 54.29, | |
"learning_rate": 0.001, | |
"loss": 47.7969, | |
"step": 3420 | |
}, | |
{ | |
"epoch": 54.44, | |
"learning_rate": 0.001, | |
"loss": 47.7844, | |
"step": 3430 | |
}, | |
{ | |
"epoch": 54.6, | |
"learning_rate": 0.001, | |
"loss": 47.7656, | |
"step": 3440 | |
}, | |
{ | |
"epoch": 54.76, | |
"learning_rate": 0.001, | |
"loss": 48.0594, | |
"step": 3450 | |
}, | |
{ | |
"epoch": 54.92, | |
"learning_rate": 0.001, | |
"loss": 47.8937, | |
"step": 3460 | |
}, | |
{ | |
"epoch": 55.08, | |
"learning_rate": 0.001, | |
"loss": 48.0063, | |
"step": 3470 | |
}, | |
{ | |
"epoch": 55.24, | |
"learning_rate": 0.001, | |
"loss": 47.8875, | |
"step": 3480 | |
}, | |
{ | |
"epoch": 55.4, | |
"learning_rate": 0.001, | |
"loss": 47.9594, | |
"step": 3490 | |
}, | |
{ | |
"epoch": 55.56, | |
"learning_rate": 0.001, | |
"loss": 48.0375, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 55.71, | |
"learning_rate": 0.001, | |
"loss": 47.9625, | |
"step": 3510 | |
}, | |
{ | |
"epoch": 55.87, | |
"learning_rate": 0.001, | |
"loss": 48.2219, | |
"step": 3520 | |
}, | |
{ | |
"epoch": 56.03, | |
"learning_rate": 0.001, | |
"loss": 48.2938, | |
"step": 3530 | |
}, | |
{ | |
"epoch": 56.19, | |
"learning_rate": 0.001, | |
"loss": 48.3875, | |
"step": 3540 | |
}, | |
{ | |
"epoch": 56.35, | |
"learning_rate": 0.001, | |
"loss": 48.3156, | |
"step": 3550 | |
}, | |
{ | |
"epoch": 56.51, | |
"learning_rate": 0.001, | |
"loss": 48.4281, | |
"step": 3560 | |
}, | |
{ | |
"epoch": 56.67, | |
"learning_rate": 0.001, | |
"loss": 48.3813, | |
"step": 3570 | |
}, | |
{ | |
"epoch": 56.83, | |
"learning_rate": 0.001, | |
"loss": 48.6594, | |
"step": 3580 | |
}, | |
{ | |
"epoch": 56.98, | |
"learning_rate": 0.001, | |
"loss": 48.5344, | |
"step": 3590 | |
}, | |
{ | |
"epoch": 57.14, | |
"learning_rate": 0.001, | |
"loss": 48.4781, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 57.3, | |
"learning_rate": 0.001, | |
"loss": 48.6469, | |
"step": 3610 | |
}, | |
{ | |
"epoch": 57.46, | |
"learning_rate": 0.001, | |
"loss": 48.7406, | |
"step": 3620 | |
}, | |
{ | |
"epoch": 57.62, | |
"learning_rate": 0.001, | |
"loss": 48.8312, | |
"step": 3630 | |
}, | |
{ | |
"epoch": 57.78, | |
"learning_rate": 0.001, | |
"loss": 48.7844, | |
"step": 3640 | |
}, | |
{ | |
"epoch": 57.94, | |
"learning_rate": 0.001, | |
"loss": 48.9906, | |
"step": 3650 | |
}, | |
{ | |
"epoch": 58.1, | |
"learning_rate": 0.001, | |
"loss": 48.7906, | |
"step": 3660 | |
}, | |
{ | |
"epoch": 58.25, | |
"learning_rate": 0.001, | |
"loss": 49.2281, | |
"step": 3670 | |
}, | |
{ | |
"epoch": 58.41, | |
"learning_rate": 0.001, | |
"loss": 49.2406, | |
"step": 3680 | |
}, | |
{ | |
"epoch": 58.57, | |
"learning_rate": 0.001, | |
"loss": 48.9844, | |
"step": 3690 | |
}, | |
{ | |
"epoch": 58.73, | |
"learning_rate": 0.001, | |
"loss": 49.0469, | |
"step": 3700 | |
}, | |
{ | |
"epoch": 58.89, | |
"learning_rate": 0.001, | |
"loss": 49.3438, | |
"step": 3710 | |
}, | |
{ | |
"epoch": 59.05, | |
"learning_rate": 0.001, | |
"loss": 49.2219, | |
"step": 3720 | |
}, | |
{ | |
"epoch": 59.21, | |
"learning_rate": 0.001, | |
"loss": 49.3219, | |
"step": 3730 | |
}, | |
{ | |
"epoch": 59.37, | |
"learning_rate": 0.001, | |
"loss": 49.4906, | |
"step": 3740 | |
}, | |
{ | |
"epoch": 59.52, | |
"learning_rate": 0.001, | |
"loss": 49.2281, | |
"step": 3750 | |
}, | |
{ | |
"epoch": 59.68, | |
"learning_rate": 0.001, | |
"loss": 49.4375, | |
"step": 3760 | |
}, | |
{ | |
"epoch": 59.84, | |
"learning_rate": 0.001, | |
"loss": 49.4844, | |
"step": 3770 | |
}, | |
{ | |
"epoch": 60.0, | |
"learning_rate": 0.001, | |
"loss": 49.4969, | |
"step": 3780 | |
}, | |
{ | |
"epoch": 60.16, | |
"learning_rate": 0.001, | |
"loss": 49.475, | |
"step": 3790 | |
}, | |
{ | |
"epoch": 60.32, | |
"learning_rate": 0.001, | |
"loss": 49.7406, | |
"step": 3800 | |
}, | |
{ | |
"epoch": 60.48, | |
"learning_rate": 0.001, | |
"loss": 49.7406, | |
"step": 3810 | |
}, | |
{ | |
"epoch": 60.63, | |
"learning_rate": 0.001, | |
"loss": 49.7687, | |
"step": 3820 | |
}, | |
{ | |
"epoch": 60.79, | |
"learning_rate": 0.001, | |
"loss": 49.6281, | |
"step": 3830 | |
}, | |
{ | |
"epoch": 60.95, | |
"learning_rate": 0.001, | |
"loss": 49.8312, | |
"step": 3840 | |
}, | |
{ | |
"epoch": 61.11, | |
"learning_rate": 0.001, | |
"loss": 50.1094, | |
"step": 3850 | |
}, | |
{ | |
"epoch": 61.27, | |
"learning_rate": 0.001, | |
"loss": 49.9156, | |
"step": 3860 | |
}, | |
{ | |
"epoch": 61.43, | |
"learning_rate": 0.001, | |
"loss": 49.7563, | |
"step": 3870 | |
}, | |
{ | |
"epoch": 61.59, | |
"learning_rate": 0.001, | |
"loss": 49.8719, | |
"step": 3880 | |
}, | |
{ | |
"epoch": 61.75, | |
"learning_rate": 0.001, | |
"loss": 50.1219, | |
"step": 3890 | |
}, | |
{ | |
"epoch": 61.9, | |
"learning_rate": 0.001, | |
"loss": 50.2469, | |
"step": 3900 | |
}, | |
{ | |
"epoch": 62.06, | |
"learning_rate": 0.001, | |
"loss": 50.0844, | |
"step": 3910 | |
}, | |
{ | |
"epoch": 62.22, | |
"learning_rate": 0.001, | |
"loss": 50.2719, | |
"step": 3920 | |
}, | |
{ | |
"epoch": 62.38, | |
"learning_rate": 0.001, | |
"loss": 50.0219, | |
"step": 3930 | |
}, | |
{ | |
"epoch": 62.54, | |
"learning_rate": 0.001, | |
"loss": 50.3875, | |
"step": 3940 | |
}, | |
{ | |
"epoch": 62.7, | |
"learning_rate": 0.001, | |
"loss": 50.3594, | |
"step": 3950 | |
}, | |
{ | |
"epoch": 62.86, | |
"learning_rate": 0.001, | |
"loss": 50.4531, | |
"step": 3960 | |
}, | |
{ | |
"epoch": 63.02, | |
"learning_rate": 0.001, | |
"loss": 50.6, | |
"step": 3970 | |
}, | |
{ | |
"epoch": 63.17, | |
"learning_rate": 0.001, | |
"loss": 50.5063, | |
"step": 3980 | |
}, | |
{ | |
"epoch": 63.33, | |
"learning_rate": 0.001, | |
"loss": 50.6125, | |
"step": 3990 | |
}, | |
{ | |
"epoch": 63.49, | |
"learning_rate": 0.001, | |
"loss": 50.6313, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 63.49, | |
"eval_loss": 41.125, | |
"eval_runtime": 97.8561, | |
"eval_samples_per_second": 5.11, | |
"eval_steps_per_second": 0.644, | |
"step": 4000 | |
} | |
], | |
"max_steps": 20000, | |
"num_train_epochs": 318, | |
"total_flos": 4.897803240892662e+18, | |
"trial_name": null, | |
"trial_params": null | |
} | |