{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 74.17746759720838, | |
"eval_steps": 500, | |
"global_step": 2325, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.16, | |
"learning_rate": 0.0001, | |
"loss": 2.4292, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.32, | |
"learning_rate": 0.0001, | |
"loss": 1.6311, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.48, | |
"learning_rate": 0.0001, | |
"loss": 1.4364, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.64, | |
"learning_rate": 0.0001, | |
"loss": 1.3208, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.8, | |
"learning_rate": 0.0001, | |
"loss": 1.2781, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.96, | |
"learning_rate": 0.0001, | |
"loss": 1.2784, | |
"step": 30 | |
}, | |
{ | |
"epoch": 1.12, | |
"learning_rate": 0.0001, | |
"loss": 1.1591, | |
"step": 35 | |
}, | |
{ | |
"epoch": 1.28, | |
"learning_rate": 0.0001, | |
"loss": 1.1193, | |
"step": 40 | |
}, | |
{ | |
"epoch": 1.44, | |
"learning_rate": 0.0001, | |
"loss": 1.1173, | |
"step": 45 | |
}, | |
{ | |
"epoch": 1.6, | |
"learning_rate": 0.0001, | |
"loss": 1.0948, | |
"step": 50 | |
}, | |
{ | |
"epoch": 1.75, | |
"learning_rate": 0.0001, | |
"loss": 1.0937, | |
"step": 55 | |
}, | |
{ | |
"epoch": 1.91, | |
"learning_rate": 0.0001, | |
"loss": 1.0643, | |
"step": 60 | |
}, | |
{ | |
"epoch": 2.07, | |
"learning_rate": 0.0001, | |
"loss": 1.0252, | |
"step": 65 | |
}, | |
{ | |
"epoch": 2.23, | |
"learning_rate": 0.0001, | |
"loss": 0.9701, | |
"step": 70 | |
}, | |
{ | |
"epoch": 2.39, | |
"learning_rate": 0.0001, | |
"loss": 0.9773, | |
"step": 75 | |
}, | |
{ | |
"epoch": 2.55, | |
"learning_rate": 0.0001, | |
"loss": 0.9364, | |
"step": 80 | |
}, | |
{ | |
"epoch": 2.71, | |
"learning_rate": 0.0001, | |
"loss": 0.955, | |
"step": 85 | |
}, | |
{ | |
"epoch": 2.87, | |
"learning_rate": 0.0001, | |
"loss": 0.9752, | |
"step": 90 | |
}, | |
{ | |
"epoch": 3.03, | |
"learning_rate": 0.0001, | |
"loss": 0.9614, | |
"step": 95 | |
}, | |
{ | |
"epoch": 3.19, | |
"learning_rate": 0.0001, | |
"loss": 0.8711, | |
"step": 100 | |
}, | |
{ | |
"epoch": 3.35, | |
"learning_rate": 0.0001, | |
"loss": 0.8728, | |
"step": 105 | |
}, | |
{ | |
"epoch": 3.51, | |
"learning_rate": 0.0001, | |
"loss": 0.8734, | |
"step": 110 | |
}, | |
{ | |
"epoch": 3.67, | |
"learning_rate": 0.0001, | |
"loss": 0.8769, | |
"step": 115 | |
}, | |
{ | |
"epoch": 3.83, | |
"learning_rate": 0.0001, | |
"loss": 0.8714, | |
"step": 120 | |
}, | |
{ | |
"epoch": 3.99, | |
"learning_rate": 0.0001, | |
"loss": 0.8694, | |
"step": 125 | |
}, | |
{ | |
"epoch": 4.15, | |
"learning_rate": 0.0001, | |
"loss": 0.8008, | |
"step": 130 | |
}, | |
{ | |
"epoch": 4.31, | |
"learning_rate": 0.0001, | |
"loss": 0.8044, | |
"step": 135 | |
}, | |
{ | |
"epoch": 4.47, | |
"learning_rate": 0.0001, | |
"loss": 0.7945, | |
"step": 140 | |
}, | |
{ | |
"epoch": 4.63, | |
"learning_rate": 0.0001, | |
"loss": 0.7927, | |
"step": 145 | |
}, | |
{ | |
"epoch": 4.79, | |
"learning_rate": 0.0001, | |
"loss": 0.7982, | |
"step": 150 | |
}, | |
{ | |
"epoch": 4.95, | |
"learning_rate": 0.0001, | |
"loss": 0.7921, | |
"step": 155 | |
}, | |
{ | |
"epoch": 5.1, | |
"learning_rate": 0.0001, | |
"loss": 0.7751, | |
"step": 160 | |
}, | |
{ | |
"epoch": 5.26, | |
"learning_rate": 0.0001, | |
"loss": 0.7197, | |
"step": 165 | |
}, | |
{ | |
"epoch": 5.42, | |
"learning_rate": 0.0001, | |
"loss": 0.7381, | |
"step": 170 | |
}, | |
{ | |
"epoch": 5.58, | |
"learning_rate": 0.0001, | |
"loss": 0.7328, | |
"step": 175 | |
}, | |
{ | |
"epoch": 5.74, | |
"learning_rate": 0.0001, | |
"loss": 0.7325, | |
"step": 180 | |
}, | |
{ | |
"epoch": 5.9, | |
"learning_rate": 0.0001, | |
"loss": 0.7492, | |
"step": 185 | |
}, | |
{ | |
"epoch": 6.06, | |
"learning_rate": 0.0001, | |
"loss": 0.7042, | |
"step": 190 | |
}, | |
{ | |
"epoch": 6.22, | |
"learning_rate": 0.0001, | |
"loss": 0.6687, | |
"step": 195 | |
}, | |
{ | |
"epoch": 6.38, | |
"learning_rate": 0.0001, | |
"loss": 0.6617, | |
"step": 200 | |
}, | |
{ | |
"epoch": 6.54, | |
"learning_rate": 0.0001, | |
"loss": 0.6895, | |
"step": 205 | |
}, | |
{ | |
"epoch": 6.7, | |
"learning_rate": 0.0001, | |
"loss": 0.6763, | |
"step": 210 | |
}, | |
{ | |
"epoch": 6.86, | |
"learning_rate": 0.0001, | |
"loss": 0.6825, | |
"step": 215 | |
}, | |
{ | |
"epoch": 7.02, | |
"learning_rate": 0.0001, | |
"loss": 0.6862, | |
"step": 220 | |
}, | |
{ | |
"epoch": 7.18, | |
"learning_rate": 0.0001, | |
"loss": 0.6176, | |
"step": 225 | |
}, | |
{ | |
"epoch": 7.34, | |
"learning_rate": 0.0001, | |
"loss": 0.6072, | |
"step": 230 | |
}, | |
{ | |
"epoch": 7.5, | |
"learning_rate": 0.0001, | |
"loss": 0.6289, | |
"step": 235 | |
}, | |
{ | |
"epoch": 7.66, | |
"learning_rate": 0.0001, | |
"loss": 0.6223, | |
"step": 240 | |
}, | |
{ | |
"epoch": 7.82, | |
"learning_rate": 0.0001, | |
"loss": 0.6358, | |
"step": 245 | |
}, | |
{ | |
"epoch": 7.98, | |
"learning_rate": 0.0001, | |
"loss": 0.6365, | |
"step": 250 | |
}, | |
{ | |
"epoch": 8.14, | |
"learning_rate": 0.0001, | |
"loss": 0.5835, | |
"step": 255 | |
}, | |
{ | |
"epoch": 8.3, | |
"learning_rate": 0.0001, | |
"loss": 0.5829, | |
"step": 260 | |
}, | |
{ | |
"epoch": 8.45, | |
"learning_rate": 0.0001, | |
"loss": 0.5816, | |
"step": 265 | |
}, | |
{ | |
"epoch": 8.61, | |
"learning_rate": 0.0001, | |
"loss": 0.5595, | |
"step": 270 | |
}, | |
{ | |
"epoch": 8.77, | |
"learning_rate": 0.0001, | |
"loss": 0.5902, | |
"step": 275 | |
}, | |
{ | |
"epoch": 8.93, | |
"learning_rate": 0.0001, | |
"loss": 0.5717, | |
"step": 280 | |
}, | |
{ | |
"epoch": 9.09, | |
"learning_rate": 0.0001, | |
"loss": 0.5584, | |
"step": 285 | |
}, | |
{ | |
"epoch": 9.25, | |
"learning_rate": 0.0001, | |
"loss": 0.538, | |
"step": 290 | |
}, | |
{ | |
"epoch": 9.41, | |
"learning_rate": 0.0001, | |
"loss": 0.5172, | |
"step": 295 | |
}, | |
{ | |
"epoch": 9.57, | |
"learning_rate": 0.0001, | |
"loss": 0.5378, | |
"step": 300 | |
}, | |
{ | |
"epoch": 9.73, | |
"learning_rate": 0.0001, | |
"loss": 0.5319, | |
"step": 305 | |
}, | |
{ | |
"epoch": 9.89, | |
"learning_rate": 0.0001, | |
"loss": 0.529, | |
"step": 310 | |
}, | |
{ | |
"epoch": 10.05, | |
"learning_rate": 0.0001, | |
"loss": 0.5236, | |
"step": 315 | |
}, | |
{ | |
"epoch": 10.21, | |
"learning_rate": 0.0001, | |
"loss": 0.4818, | |
"step": 320 | |
}, | |
{ | |
"epoch": 10.37, | |
"learning_rate": 0.0001, | |
"loss": 0.4856, | |
"step": 325 | |
}, | |
{ | |
"epoch": 10.53, | |
"learning_rate": 0.0001, | |
"loss": 0.502, | |
"step": 330 | |
}, | |
{ | |
"epoch": 10.69, | |
"learning_rate": 0.0001, | |
"loss": 0.482, | |
"step": 335 | |
}, | |
{ | |
"epoch": 10.85, | |
"learning_rate": 0.0001, | |
"loss": 0.4957, | |
"step": 340 | |
}, | |
{ | |
"epoch": 11.01, | |
"learning_rate": 0.0001, | |
"loss": 0.503, | |
"step": 345 | |
}, | |
{ | |
"epoch": 11.17, | |
"learning_rate": 0.0001, | |
"loss": 0.4476, | |
"step": 350 | |
}, | |
{ | |
"epoch": 11.33, | |
"learning_rate": 0.0001, | |
"loss": 0.4525, | |
"step": 355 | |
}, | |
{ | |
"epoch": 11.49, | |
"learning_rate": 0.0001, | |
"loss": 0.4533, | |
"step": 360 | |
}, | |
{ | |
"epoch": 11.65, | |
"learning_rate": 0.0001, | |
"loss": 0.4623, | |
"step": 365 | |
}, | |
{ | |
"epoch": 11.8, | |
"learning_rate": 0.0001, | |
"loss": 0.459, | |
"step": 370 | |
}, | |
{ | |
"epoch": 11.96, | |
"learning_rate": 0.0001, | |
"loss": 0.4563, | |
"step": 375 | |
}, | |
{ | |
"epoch": 12.12, | |
"learning_rate": 0.0001, | |
"loss": 0.4408, | |
"step": 380 | |
}, | |
{ | |
"epoch": 12.28, | |
"learning_rate": 0.0001, | |
"loss": 0.4123, | |
"step": 385 | |
}, | |
{ | |
"epoch": 12.44, | |
"learning_rate": 0.0001, | |
"loss": 0.4203, | |
"step": 390 | |
}, | |
{ | |
"epoch": 12.6, | |
"learning_rate": 0.0001, | |
"loss": 0.4283, | |
"step": 395 | |
}, | |
{ | |
"epoch": 12.76, | |
"learning_rate": 0.0001, | |
"loss": 0.4144, | |
"step": 400 | |
}, | |
{ | |
"epoch": 12.92, | |
"learning_rate": 0.0001, | |
"loss": 0.4282, | |
"step": 405 | |
}, | |
{ | |
"epoch": 13.08, | |
"learning_rate": 0.0001, | |
"loss": 0.4062, | |
"step": 410 | |
}, | |
{ | |
"epoch": 13.24, | |
"learning_rate": 0.0001, | |
"loss": 0.393, | |
"step": 415 | |
}, | |
{ | |
"epoch": 13.4, | |
"learning_rate": 0.0001, | |
"loss": 0.3828, | |
"step": 420 | |
}, | |
{ | |
"epoch": 13.56, | |
"learning_rate": 0.0001, | |
"loss": 0.3871, | |
"step": 425 | |
}, | |
{ | |
"epoch": 13.72, | |
"learning_rate": 0.0001, | |
"loss": 0.3957, | |
"step": 430 | |
}, | |
{ | |
"epoch": 13.88, | |
"learning_rate": 0.0001, | |
"loss": 0.3931, | |
"step": 435 | |
}, | |
{ | |
"epoch": 14.04, | |
"learning_rate": 0.0001, | |
"loss": 0.389, | |
"step": 440 | |
}, | |
{ | |
"epoch": 14.2, | |
"learning_rate": 0.0001, | |
"loss": 0.3529, | |
"step": 445 | |
}, | |
{ | |
"epoch": 14.36, | |
"learning_rate": 0.0001, | |
"loss": 0.3639, | |
"step": 450 | |
}, | |
{ | |
"epoch": 14.52, | |
"learning_rate": 0.0001, | |
"loss": 0.3665, | |
"step": 455 | |
}, | |
{ | |
"epoch": 14.68, | |
"learning_rate": 0.0001, | |
"loss": 0.3609, | |
"step": 460 | |
}, | |
{ | |
"epoch": 14.84, | |
"learning_rate": 0.0001, | |
"loss": 0.3642, | |
"step": 465 | |
}, | |
{ | |
"epoch": 15.0, | |
"learning_rate": 0.0001, | |
"loss": 0.3693, | |
"step": 470 | |
}, | |
{ | |
"epoch": 15.15, | |
"learning_rate": 0.0001, | |
"loss": 0.3382, | |
"step": 475 | |
}, | |
{ | |
"epoch": 15.31, | |
"learning_rate": 0.0001, | |
"loss": 0.3344, | |
"step": 480 | |
}, | |
{ | |
"epoch": 15.47, | |
"learning_rate": 0.0001, | |
"loss": 0.3379, | |
"step": 485 | |
}, | |
{ | |
"epoch": 15.63, | |
"learning_rate": 0.0001, | |
"loss": 0.3307, | |
"step": 490 | |
}, | |
{ | |
"epoch": 15.79, | |
"learning_rate": 0.0001, | |
"loss": 0.3474, | |
"step": 495 | |
}, | |
{ | |
"epoch": 15.95, | |
"learning_rate": 0.0001, | |
"loss": 0.3348, | |
"step": 500 | |
}, | |
{ | |
"epoch": 16.11, | |
"learning_rate": 0.0001, | |
"loss": 0.3228, | |
"step": 505 | |
}, | |
{ | |
"epoch": 16.27, | |
"learning_rate": 0.0001, | |
"loss": 0.3164, | |
"step": 510 | |
}, | |
{ | |
"epoch": 16.43, | |
"learning_rate": 0.0001, | |
"loss": 0.3078, | |
"step": 515 | |
}, | |
{ | |
"epoch": 16.59, | |
"learning_rate": 0.0001, | |
"loss": 0.3135, | |
"step": 520 | |
}, | |
{ | |
"epoch": 16.75, | |
"learning_rate": 0.0001, | |
"loss": 0.3091, | |
"step": 525 | |
}, | |
{ | |
"epoch": 16.91, | |
"learning_rate": 0.0001, | |
"loss": 0.3229, | |
"step": 530 | |
}, | |
{ | |
"epoch": 17.07, | |
"learning_rate": 0.0001, | |
"loss": 0.3003, | |
"step": 535 | |
}, | |
{ | |
"epoch": 17.23, | |
"learning_rate": 0.0001, | |
"loss": 0.2832, | |
"step": 540 | |
}, | |
{ | |
"epoch": 17.39, | |
"learning_rate": 0.0001, | |
"loss": 0.2936, | |
"step": 545 | |
}, | |
{ | |
"epoch": 17.55, | |
"learning_rate": 0.0001, | |
"loss": 0.2881, | |
"step": 550 | |
}, | |
{ | |
"epoch": 17.71, | |
"learning_rate": 0.0001, | |
"loss": 0.2858, | |
"step": 555 | |
}, | |
{ | |
"epoch": 17.87, | |
"learning_rate": 0.0001, | |
"loss": 0.2987, | |
"step": 560 | |
}, | |
{ | |
"epoch": 18.03, | |
"learning_rate": 0.0001, | |
"loss": 0.2986, | |
"step": 565 | |
}, | |
{ | |
"epoch": 18.19, | |
"learning_rate": 0.0001, | |
"loss": 0.2624, | |
"step": 570 | |
}, | |
{ | |
"epoch": 18.34, | |
"learning_rate": 0.0001, | |
"loss": 0.2668, | |
"step": 575 | |
}, | |
{ | |
"epoch": 18.5, | |
"learning_rate": 0.0001, | |
"loss": 0.2701, | |
"step": 580 | |
}, | |
{ | |
"epoch": 18.66, | |
"learning_rate": 0.0001, | |
"loss": 0.2759, | |
"step": 585 | |
}, | |
{ | |
"epoch": 18.82, | |
"learning_rate": 0.0001, | |
"loss": 0.273, | |
"step": 590 | |
}, | |
{ | |
"epoch": 18.98, | |
"learning_rate": 0.0001, | |
"loss": 0.2776, | |
"step": 595 | |
}, | |
{ | |
"epoch": 19.14, | |
"learning_rate": 0.0001, | |
"loss": 0.256, | |
"step": 600 | |
}, | |
{ | |
"epoch": 19.3, | |
"learning_rate": 0.0001, | |
"loss": 0.2494, | |
"step": 605 | |
}, | |
{ | |
"epoch": 19.46, | |
"learning_rate": 0.0001, | |
"loss": 0.2507, | |
"step": 610 | |
}, | |
{ | |
"epoch": 19.62, | |
"learning_rate": 0.0001, | |
"loss": 0.2541, | |
"step": 615 | |
}, | |
{ | |
"epoch": 19.78, | |
"learning_rate": 0.0001, | |
"loss": 0.2513, | |
"step": 620 | |
}, | |
{ | |
"epoch": 19.94, | |
"learning_rate": 0.0001, | |
"loss": 0.2579, | |
"step": 625 | |
}, | |
{ | |
"epoch": 20.1, | |
"learning_rate": 0.0001, | |
"loss": 0.2421, | |
"step": 630 | |
}, | |
{ | |
"epoch": 20.26, | |
"learning_rate": 0.0001, | |
"loss": 0.2348, | |
"step": 635 | |
}, | |
{ | |
"epoch": 20.42, | |
"learning_rate": 0.0001, | |
"loss": 0.2366, | |
"step": 640 | |
}, | |
{ | |
"epoch": 20.58, | |
"learning_rate": 0.0001, | |
"loss": 0.2306, | |
"step": 645 | |
}, | |
{ | |
"epoch": 20.74, | |
"learning_rate": 0.0001, | |
"loss": 0.2356, | |
"step": 650 | |
}, | |
{ | |
"epoch": 20.9, | |
"learning_rate": 0.0001, | |
"loss": 0.2407, | |
"step": 655 | |
}, | |
{ | |
"epoch": 21.06, | |
"learning_rate": 0.0001, | |
"loss": 0.2328, | |
"step": 660 | |
}, | |
{ | |
"epoch": 21.22, | |
"learning_rate": 0.0001, | |
"loss": 0.2183, | |
"step": 665 | |
}, | |
{ | |
"epoch": 21.38, | |
"learning_rate": 0.0001, | |
"loss": 0.2121, | |
"step": 670 | |
}, | |
{ | |
"epoch": 21.54, | |
"learning_rate": 0.0001, | |
"loss": 0.2186, | |
"step": 675 | |
}, | |
{ | |
"epoch": 21.69, | |
"learning_rate": 0.0001, | |
"loss": 0.22, | |
"step": 680 | |
}, | |
{ | |
"epoch": 21.85, | |
"learning_rate": 0.0001, | |
"loss": 0.2266, | |
"step": 685 | |
}, | |
{ | |
"epoch": 22.01, | |
"learning_rate": 0.0001, | |
"loss": 0.2245, | |
"step": 690 | |
}, | |
{ | |
"epoch": 22.17, | |
"learning_rate": 0.0001, | |
"loss": 0.202, | |
"step": 695 | |
}, | |
{ | |
"epoch": 22.33, | |
"learning_rate": 0.0001, | |
"loss": 0.1993, | |
"step": 700 | |
}, | |
{ | |
"epoch": 22.49, | |
"learning_rate": 0.0001, | |
"loss": 0.2056, | |
"step": 705 | |
}, | |
{ | |
"epoch": 22.65, | |
"learning_rate": 0.0001, | |
"loss": 0.2076, | |
"step": 710 | |
}, | |
{ | |
"epoch": 22.81, | |
"learning_rate": 0.0001, | |
"loss": 0.208, | |
"step": 715 | |
}, | |
{ | |
"epoch": 22.97, | |
"learning_rate": 0.0001, | |
"loss": 0.2134, | |
"step": 720 | |
}, | |
{ | |
"epoch": 23.13, | |
"learning_rate": 0.0001, | |
"loss": 0.1929, | |
"step": 725 | |
}, | |
{ | |
"epoch": 23.29, | |
"learning_rate": 0.0001, | |
"loss": 0.19, | |
"step": 730 | |
}, | |
{ | |
"epoch": 23.45, | |
"learning_rate": 0.0001, | |
"loss": 0.191, | |
"step": 735 | |
}, | |
{ | |
"epoch": 23.61, | |
"learning_rate": 0.0001, | |
"loss": 0.1923, | |
"step": 740 | |
}, | |
{ | |
"epoch": 23.77, | |
"learning_rate": 0.0001, | |
"loss": 0.193, | |
"step": 745 | |
}, | |
{ | |
"epoch": 23.93, | |
"learning_rate": 0.0001, | |
"loss": 0.1978, | |
"step": 750 | |
}, | |
{ | |
"epoch": 24.09, | |
"learning_rate": 0.0001, | |
"loss": 0.1898, | |
"step": 755 | |
}, | |
{ | |
"epoch": 24.25, | |
"learning_rate": 0.0001, | |
"loss": 0.1785, | |
"step": 760 | |
}, | |
{ | |
"epoch": 24.41, | |
"learning_rate": 0.0001, | |
"loss": 0.1815, | |
"step": 765 | |
}, | |
{ | |
"epoch": 24.57, | |
"learning_rate": 0.0001, | |
"loss": 0.1854, | |
"step": 770 | |
}, | |
{ | |
"epoch": 24.73, | |
"learning_rate": 0.0001, | |
"loss": 0.1796, | |
"step": 775 | |
}, | |
{ | |
"epoch": 24.89, | |
"learning_rate": 0.0001, | |
"loss": 0.1797, | |
"step": 780 | |
}, | |
{ | |
"epoch": 25.04, | |
"learning_rate": 0.0001, | |
"loss": 0.1779, | |
"step": 785 | |
}, | |
{ | |
"epoch": 25.2, | |
"learning_rate": 0.0001, | |
"loss": 0.1689, | |
"step": 790 | |
}, | |
{ | |
"epoch": 25.36, | |
"learning_rate": 0.0001, | |
"loss": 0.1642, | |
"step": 795 | |
}, | |
{ | |
"epoch": 25.52, | |
"learning_rate": 0.0001, | |
"loss": 0.1697, | |
"step": 800 | |
}, | |
{ | |
"epoch": 25.68, | |
"learning_rate": 0.0001, | |
"loss": 0.1713, | |
"step": 805 | |
}, | |
{ | |
"epoch": 25.84, | |
"learning_rate": 0.0001, | |
"loss": 0.175, | |
"step": 810 | |
}, | |
{ | |
"epoch": 26.0, | |
"learning_rate": 0.0001, | |
"loss": 0.1736, | |
"step": 815 | |
}, | |
{ | |
"epoch": 26.16, | |
"learning_rate": 0.0001, | |
"loss": 0.1598, | |
"step": 820 | |
}, | |
{ | |
"epoch": 26.32, | |
"learning_rate": 0.0001, | |
"loss": 0.156, | |
"step": 825 | |
}, | |
{ | |
"epoch": 26.48, | |
"learning_rate": 0.0001, | |
"loss": 0.1605, | |
"step": 830 | |
}, | |
{ | |
"epoch": 26.64, | |
"learning_rate": 0.0001, | |
"loss": 0.1595, | |
"step": 835 | |
}, | |
{ | |
"epoch": 26.8, | |
"learning_rate": 0.0001, | |
"loss": 0.1613, | |
"step": 840 | |
}, | |
{ | |
"epoch": 26.96, | |
"learning_rate": 0.0001, | |
"loss": 0.1636, | |
"step": 845 | |
}, | |
{ | |
"epoch": 27.12, | |
"learning_rate": 0.0001, | |
"loss": 0.1526, | |
"step": 850 | |
}, | |
{ | |
"epoch": 27.28, | |
"learning_rate": 0.0001, | |
"loss": 0.1506, | |
"step": 855 | |
}, | |
{ | |
"epoch": 27.44, | |
"learning_rate": 0.0001, | |
"loss": 0.1492, | |
"step": 860 | |
}, | |
{ | |
"epoch": 27.6, | |
"learning_rate": 0.0001, | |
"loss": 0.1486, | |
"step": 865 | |
}, | |
{ | |
"epoch": 27.76, | |
"learning_rate": 0.0001, | |
"loss": 0.1524, | |
"step": 870 | |
}, | |
{ | |
"epoch": 27.92, | |
"learning_rate": 0.0001, | |
"loss": 0.1538, | |
"step": 875 | |
}, | |
{ | |
"epoch": 28.08, | |
"learning_rate": 0.0001, | |
"loss": 0.1495, | |
"step": 880 | |
}, | |
{ | |
"epoch": 28.24, | |
"learning_rate": 0.0001, | |
"loss": 0.1436, | |
"step": 885 | |
}, | |
{ | |
"epoch": 28.39, | |
"learning_rate": 0.0001, | |
"loss": 0.1387, | |
"step": 890 | |
}, | |
{ | |
"epoch": 28.55, | |
"learning_rate": 0.0001, | |
"loss": 0.1419, | |
"step": 895 | |
}, | |
{ | |
"epoch": 28.71, | |
"learning_rate": 0.0001, | |
"loss": 0.144, | |
"step": 900 | |
}, | |
{ | |
"epoch": 28.87, | |
"learning_rate": 0.0001, | |
"loss": 0.1421, | |
"step": 905 | |
}, | |
{ | |
"epoch": 29.03, | |
"learning_rate": 0.0001, | |
"loss": 0.144, | |
"step": 910 | |
}, | |
{ | |
"epoch": 29.19, | |
"learning_rate": 0.0001, | |
"loss": 0.1336, | |
"step": 915 | |
}, | |
{ | |
"epoch": 29.35, | |
"learning_rate": 0.0001, | |
"loss": 0.1342, | |
"step": 920 | |
}, | |
{ | |
"epoch": 29.51, | |
"learning_rate": 0.0001, | |
"loss": 0.1315, | |
"step": 925 | |
}, | |
{ | |
"epoch": 29.67, | |
"learning_rate": 0.0001, | |
"loss": 0.134, | |
"step": 930 | |
}, | |
{ | |
"epoch": 29.83, | |
"learning_rate": 0.0001, | |
"loss": 0.1385, | |
"step": 935 | |
}, | |
{ | |
"epoch": 29.99, | |
"learning_rate": 0.0001, | |
"loss": 0.1396, | |
"step": 940 | |
}, | |
{ | |
"epoch": 30.15, | |
"learning_rate": 0.0001, | |
"loss": 0.1261, | |
"step": 945 | |
}, | |
{ | |
"epoch": 30.31, | |
"learning_rate": 0.0001, | |
"loss": 0.1254, | |
"step": 950 | |
}, | |
{ | |
"epoch": 30.47, | |
"learning_rate": 0.0001, | |
"loss": 0.1274, | |
"step": 955 | |
}, | |
{ | |
"epoch": 30.63, | |
"learning_rate": 0.0001, | |
"loss": 0.1273, | |
"step": 960 | |
}, | |
{ | |
"epoch": 30.79, | |
"learning_rate": 0.0001, | |
"loss": 0.1328, | |
"step": 965 | |
}, | |
{ | |
"epoch": 30.95, | |
"learning_rate": 0.0001, | |
"loss": 0.1305, | |
"step": 970 | |
}, | |
{ | |
"epoch": 31.11, | |
"learning_rate": 0.0001, | |
"loss": 0.1215, | |
"step": 975 | |
}, | |
{ | |
"epoch": 31.27, | |
"learning_rate": 0.0001, | |
"loss": 0.1196, | |
"step": 980 | |
}, | |
{ | |
"epoch": 31.43, | |
"learning_rate": 0.0001, | |
"loss": 0.1209, | |
"step": 985 | |
}, | |
{ | |
"epoch": 31.59, | |
"learning_rate": 0.0001, | |
"loss": 0.1211, | |
"step": 990 | |
}, | |
{ | |
"epoch": 31.74, | |
"learning_rate": 0.0001, | |
"loss": 0.125, | |
"step": 995 | |
}, | |
{ | |
"epoch": 31.9, | |
"learning_rate": 0.0001, | |
"loss": 0.1262, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 32.06, | |
"learning_rate": 0.0001, | |
"loss": 0.1185, | |
"step": 1005 | |
}, | |
{ | |
"epoch": 32.22, | |
"learning_rate": 0.0001, | |
"loss": 0.113, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 32.38, | |
"learning_rate": 0.0001, | |
"loss": 0.1136, | |
"step": 1015 | |
}, | |
{ | |
"epoch": 32.54, | |
"learning_rate": 0.0001, | |
"loss": 0.1144, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 32.7, | |
"learning_rate": 0.0001, | |
"loss": 0.1147, | |
"step": 1025 | |
}, | |
{ | |
"epoch": 32.86, | |
"learning_rate": 0.0001, | |
"loss": 0.1168, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 33.02, | |
"learning_rate": 0.0001, | |
"loss": 0.1179, | |
"step": 1035 | |
}, | |
{ | |
"epoch": 33.18, | |
"learning_rate": 0.0001, | |
"loss": 0.1073, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 33.34, | |
"learning_rate": 0.0001, | |
"loss": 0.1091, | |
"step": 1045 | |
}, | |
{ | |
"epoch": 33.5, | |
"learning_rate": 0.0001, | |
"loss": 0.1116, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 33.66, | |
"learning_rate": 0.0001, | |
"loss": 0.1092, | |
"step": 1055 | |
}, | |
{ | |
"epoch": 33.82, | |
"learning_rate": 0.0001, | |
"loss": 0.1107, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 33.98, | |
"learning_rate": 0.0001, | |
"loss": 0.1094, | |
"step": 1065 | |
}, | |
{ | |
"epoch": 34.14, | |
"learning_rate": 0.0001, | |
"loss": 0.1033, | |
"step": 1070 | |
}, | |
{ | |
"epoch": 34.3, | |
"learning_rate": 0.0001, | |
"loss": 0.1027, | |
"step": 1075 | |
}, | |
{ | |
"epoch": 34.46, | |
"learning_rate": 0.0001, | |
"loss": 0.1021, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 34.62, | |
"learning_rate": 0.0001, | |
"loss": 0.1045, | |
"step": 1085 | |
}, | |
{ | |
"epoch": 34.78, | |
"learning_rate": 0.0001, | |
"loss": 0.1044, | |
"step": 1090 | |
}, | |
{ | |
"epoch": 34.94, | |
"learning_rate": 0.0001, | |
"loss": 0.1081, | |
"step": 1095 | |
}, | |
{ | |
"epoch": 35.09, | |
"learning_rate": 0.0001, | |
"loss": 0.1028, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 35.25, | |
"learning_rate": 0.0001, | |
"loss": 0.0967, | |
"step": 1105 | |
}, | |
{ | |
"epoch": 35.41, | |
"learning_rate": 0.0001, | |
"loss": 0.0967, | |
"step": 1110 | |
}, | |
{ | |
"epoch": 35.57, | |
"learning_rate": 0.0001, | |
"loss": 0.1019, | |
"step": 1115 | |
}, | |
{ | |
"epoch": 35.73, | |
"learning_rate": 0.0001, | |
"loss": 0.1005, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 35.89, | |
"learning_rate": 0.0001, | |
"loss": 0.1031, | |
"step": 1125 | |
}, | |
{ | |
"epoch": 36.05, | |
"learning_rate": 0.0001, | |
"loss": 0.0981, | |
"step": 1130 | |
}, | |
{ | |
"epoch": 36.21, | |
"learning_rate": 0.0001, | |
"loss": 0.0945, | |
"step": 1135 | |
}, | |
{ | |
"epoch": 36.37, | |
"learning_rate": 0.0001, | |
"loss": 0.0954, | |
"step": 1140 | |
}, | |
{ | |
"epoch": 36.53, | |
"learning_rate": 0.0001, | |
"loss": 0.0936, | |
"step": 1145 | |
}, | |
{ | |
"epoch": 36.69, | |
"learning_rate": 0.0001, | |
"loss": 0.0972, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 36.85, | |
"learning_rate": 0.0001, | |
"loss": 0.0951, | |
"step": 1155 | |
}, | |
{ | |
"epoch": 37.01, | |
"learning_rate": 0.0001, | |
"loss": 0.0974, | |
"step": 1160 | |
}, | |
{ | |
"epoch": 37.17, | |
"learning_rate": 0.0001, | |
"loss": 0.0902, | |
"step": 1165 | |
}, | |
{ | |
"epoch": 37.33, | |
"learning_rate": 0.0001, | |
"loss": 0.0909, | |
"step": 1170 | |
}, | |
{ | |
"epoch": 37.49, | |
"learning_rate": 0.0001, | |
"loss": 0.0918, | |
"step": 1175 | |
}, | |
{ | |
"epoch": 37.65, | |
"learning_rate": 0.0001, | |
"loss": 0.0913, | |
"step": 1180 | |
}, | |
{ | |
"epoch": 37.81, | |
"learning_rate": 0.0001, | |
"loss": 0.0926, | |
"step": 1185 | |
}, | |
{ | |
"epoch": 37.97, | |
"learning_rate": 0.0001, | |
"loss": 0.0925, | |
"step": 1190 | |
}, | |
{ | |
"epoch": 38.13, | |
"learning_rate": 0.0001, | |
"loss": 0.0882, | |
"step": 1195 | |
}, | |
{ | |
"epoch": 38.29, | |
"learning_rate": 0.0001, | |
"loss": 0.0856, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 38.44, | |
"learning_rate": 0.0001, | |
"loss": 0.0881, | |
"step": 1205 | |
}, | |
{ | |
"epoch": 38.6, | |
"learning_rate": 0.0001, | |
"loss": 0.0889, | |
"step": 1210 | |
}, | |
{ | |
"epoch": 38.76, | |
"learning_rate": 0.0001, | |
"loss": 0.0863, | |
"step": 1215 | |
}, | |
{ | |
"epoch": 38.92, | |
"learning_rate": 0.0001, | |
"loss": 0.087, | |
"step": 1220 | |
}, | |
{ | |
"epoch": 39.08, | |
"learning_rate": 0.0001, | |
"loss": 0.0851, | |
"step": 1225 | |
}, | |
{ | |
"epoch": 39.24, | |
"learning_rate": 0.0001, | |
"loss": 0.0819, | |
"step": 1230 | |
}, | |
{ | |
"epoch": 39.4, | |
"learning_rate": 0.0001, | |
"loss": 0.0844, | |
"step": 1235 | |
}, | |
{ | |
"epoch": 39.56, | |
"learning_rate": 0.0001, | |
"loss": 0.0826, | |
"step": 1240 | |
}, | |
{ | |
"epoch": 39.72, | |
"learning_rate": 0.0001, | |
"loss": 0.0832, | |
"step": 1245 | |
}, | |
{ | |
"epoch": 39.88, | |
"learning_rate": 0.0001, | |
"loss": 0.0838, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 40.04, | |
"learning_rate": 0.0001, | |
"loss": 0.087, | |
"step": 1255 | |
}, | |
{ | |
"epoch": 40.2, | |
"learning_rate": 0.0001, | |
"loss": 0.0792, | |
"step": 1260 | |
}, | |
{ | |
"epoch": 40.36, | |
"learning_rate": 0.0001, | |
"loss": 0.0802, | |
"step": 1265 | |
}, | |
{ | |
"epoch": 40.52, | |
"learning_rate": 0.0001, | |
"loss": 0.0806, | |
"step": 1270 | |
}, | |
{ | |
"epoch": 40.68, | |
"learning_rate": 0.0001, | |
"loss": 0.0768, | |
"step": 1275 | |
}, | |
{ | |
"epoch": 40.84, | |
"learning_rate": 0.0001, | |
"loss": 0.0812, | |
"step": 1280 | |
}, | |
{ | |
"epoch": 41.0, | |
"learning_rate": 0.0001, | |
"loss": 0.0849, | |
"step": 1285 | |
}, | |
{ | |
"epoch": 41.16, | |
"learning_rate": 0.0001, | |
"loss": 0.0767, | |
"step": 1290 | |
}, | |
{ | |
"epoch": 41.32, | |
"learning_rate": 0.0001, | |
"loss": 0.0766, | |
"step": 1295 | |
}, | |
{ | |
"epoch": 41.48, | |
"learning_rate": 0.0001, | |
"loss": 0.0774, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 41.64, | |
"learning_rate": 0.0001, | |
"loss": 0.0789, | |
"step": 1305 | |
}, | |
{ | |
"epoch": 41.79, | |
"learning_rate": 0.0001, | |
"loss": 0.0774, | |
"step": 1310 | |
}, | |
{ | |
"epoch": 41.95, | |
"learning_rate": 0.0001, | |
"loss": 0.0795, | |
"step": 1315 | |
}, | |
{ | |
"epoch": 42.11, | |
"learning_rate": 0.0001, | |
"loss": 0.0748, | |
"step": 1320 | |
}, | |
{ | |
"epoch": 42.27, | |
"learning_rate": 0.0001, | |
"loss": 0.0728, | |
"step": 1325 | |
}, | |
{ | |
"epoch": 42.43, | |
"learning_rate": 0.0001, | |
"loss": 0.0751, | |
"step": 1330 | |
}, | |
{ | |
"epoch": 42.59, | |
"learning_rate": 0.0001, | |
"loss": 0.0765, | |
"step": 1335 | |
}, | |
{ | |
"epoch": 42.75, | |
"learning_rate": 0.0001, | |
"loss": 0.0741, | |
"step": 1340 | |
}, | |
{ | |
"epoch": 42.91, | |
"learning_rate": 0.0001, | |
"loss": 0.0761, | |
"step": 1345 | |
}, | |
{ | |
"epoch": 43.07, | |
"learning_rate": 0.0001, | |
"loss": 0.0752, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 43.23, | |
"learning_rate": 0.0001, | |
"loss": 0.0715, | |
"step": 1355 | |
}, | |
{ | |
"epoch": 43.39, | |
"learning_rate": 0.0001, | |
"loss": 0.0734, | |
"step": 1360 | |
}, | |
{ | |
"epoch": 43.55, | |
"learning_rate": 0.0001, | |
"loss": 0.0727, | |
"step": 1365 | |
}, | |
{ | |
"epoch": 43.71, | |
"learning_rate": 0.0001, | |
"loss": 0.0726, | |
"step": 1370 | |
}, | |
{ | |
"epoch": 43.87, | |
"learning_rate": 0.0001, | |
"loss": 0.0731, | |
"step": 1375 | |
}, | |
{ | |
"epoch": 44.03, | |
"learning_rate": 0.0001, | |
"loss": 0.0706, | |
"step": 1380 | |
}, | |
{ | |
"epoch": 44.19, | |
"learning_rate": 0.0001, | |
"loss": 0.0684, | |
"step": 1385 | |
}, | |
{ | |
"epoch": 44.35, | |
"learning_rate": 0.0001, | |
"loss": 0.0695, | |
"step": 1390 | |
}, | |
{ | |
"epoch": 44.51, | |
"learning_rate": 0.0001, | |
"loss": 0.0691, | |
"step": 1395 | |
}, | |
{ | |
"epoch": 44.67, | |
"learning_rate": 0.0001, | |
"loss": 0.0711, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 44.83, | |
"learning_rate": 0.0001, | |
"loss": 0.07, | |
"step": 1405 | |
}, | |
{ | |
"epoch": 44.99, | |
"learning_rate": 0.0001, | |
"loss": 0.071, | |
"step": 1410 | |
}, | |
{ | |
"epoch": 45.14, | |
"learning_rate": 0.0001, | |
"loss": 0.0663, | |
"step": 1415 | |
}, | |
{ | |
"epoch": 45.3, | |
"learning_rate": 0.0001, | |
"loss": 0.0663, | |
"step": 1420 | |
}, | |
{ | |
"epoch": 45.46, | |
"learning_rate": 0.0001, | |
"loss": 0.0667, | |
"step": 1425 | |
}, | |
{ | |
"epoch": 45.62, | |
"learning_rate": 0.0001, | |
"loss": 0.0694, | |
"step": 1430 | |
}, | |
{ | |
"epoch": 45.78, | |
"learning_rate": 0.0001, | |
"loss": 0.0677, | |
"step": 1435 | |
}, | |
{ | |
"epoch": 45.94, | |
"learning_rate": 0.0001, | |
"loss": 0.0683, | |
"step": 1440 | |
}, | |
{ | |
"epoch": 46.1, | |
"learning_rate": 0.0001, | |
"loss": 0.065, | |
"step": 1445 | |
}, | |
{ | |
"epoch": 46.26, | |
"learning_rate": 0.0001, | |
"loss": 0.0642, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 46.42, | |
"learning_rate": 0.0001, | |
"loss": 0.066, | |
"step": 1455 | |
}, | |
{ | |
"epoch": 46.58, | |
"learning_rate": 0.0001, | |
"loss": 0.0644, | |
"step": 1460 | |
}, | |
{ | |
"epoch": 46.74, | |
"learning_rate": 0.0001, | |
"loss": 0.0657, | |
"step": 1465 | |
}, | |
{ | |
"epoch": 46.9, | |
"learning_rate": 0.0001, | |
"loss": 0.066, | |
"step": 1470 | |
}, | |
{ | |
"epoch": 47.06, | |
"learning_rate": 0.0001, | |
"loss": 0.0648, | |
"step": 1475 | |
}, | |
{ | |
"epoch": 47.22, | |
"learning_rate": 0.0001, | |
"loss": 0.0614, | |
"step": 1480 | |
}, | |
{ | |
"epoch": 47.38, | |
"learning_rate": 0.0001, | |
"loss": 0.063, | |
"step": 1485 | |
}, | |
{ | |
"epoch": 47.54, | |
"learning_rate": 0.0001, | |
"loss": 0.0651, | |
"step": 1490 | |
}, | |
{ | |
"epoch": 47.7, | |
"learning_rate": 0.0001, | |
"loss": 0.0635, | |
"step": 1495 | |
}, | |
{ | |
"epoch": 47.86, | |
"learning_rate": 0.0001, | |
"loss": 0.0637, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 48.02, | |
"learning_rate": 0.0001, | |
"loss": 0.0627, | |
"step": 1505 | |
}, | |
{ | |
"epoch": 48.18, | |
"learning_rate": 0.0001, | |
"loss": 0.0602, | |
"step": 1510 | |
}, | |
{ | |
"epoch": 48.33, | |
"learning_rate": 0.0001, | |
"loss": 0.0598, | |
"step": 1515 | |
}, | |
{ | |
"epoch": 48.49, | |
"learning_rate": 0.0001, | |
"loss": 0.06, | |
"step": 1520 | |
}, | |
{ | |
"epoch": 48.65, | |
"learning_rate": 0.0001, | |
"loss": 0.0607, | |
"step": 1525 | |
}, | |
{ | |
"epoch": 48.81, | |
"learning_rate": 0.0001, | |
"loss": 0.063, | |
"step": 1530 | |
}, | |
{ | |
"epoch": 48.97, | |
"learning_rate": 0.0001, | |
"loss": 0.0628, | |
"step": 1535 | |
}, | |
{ | |
"epoch": 49.13, | |
"learning_rate": 0.0001, | |
"loss": 0.0584, | |
"step": 1540 | |
}, | |
{ | |
"epoch": 49.29, | |
"learning_rate": 0.0001, | |
"loss": 0.0585, | |
"step": 1545 | |
}, | |
{ | |
"epoch": 49.45, | |
"learning_rate": 0.0001, | |
"loss": 0.0599, | |
"step": 1550 | |
}, | |
{ | |
"epoch": 49.61, | |
"learning_rate": 0.0001, | |
"loss": 0.0583, | |
"step": 1555 | |
}, | |
{ | |
"epoch": 49.77, | |
"learning_rate": 0.0001, | |
"loss": 0.0601, | |
"step": 1560 | |
}, | |
{ | |
"epoch": 49.93, | |
"learning_rate": 0.0001, | |
"loss": 0.0596, | |
"step": 1565 | |
}, | |
{ | |
"epoch": 50.09, | |
"learning_rate": 0.0001, | |
"loss": 0.0588, | |
"step": 1570 | |
}, | |
{ | |
"epoch": 50.25, | |
"learning_rate": 0.0001, | |
"loss": 0.0576, | |
"step": 1575 | |
}, | |
{ | |
"epoch": 50.41, | |
"learning_rate": 0.0001, | |
"loss": 0.0564, | |
"step": 1580 | |
}, | |
{ | |
"epoch": 50.57, | |
"learning_rate": 0.0001, | |
"loss": 0.0581, | |
"step": 1585 | |
}, | |
{ | |
"epoch": 50.73, | |
"learning_rate": 0.0001, | |
"loss": 0.0583, | |
"step": 1590 | |
}, | |
{ | |
"epoch": 50.89, | |
"learning_rate": 0.0001, | |
"loss": 0.0568, | |
"step": 1595 | |
}, | |
{ | |
"epoch": 51.05, | |
"learning_rate": 0.0001, | |
"loss": 0.0586, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 51.21, | |
"learning_rate": 0.0001, | |
"loss": 0.0544, | |
"step": 1605 | |
}, | |
{ | |
"epoch": 51.37, | |
"learning_rate": 0.0001, | |
"loss": 0.0553, | |
"step": 1610 | |
}, | |
{ | |
"epoch": 51.53, | |
"learning_rate": 0.0001, | |
"loss": 0.0559, | |
"step": 1615 | |
}, | |
{ | |
"epoch": 51.68, | |
"learning_rate": 0.0001, | |
"loss": 0.0553, | |
"step": 1620 | |
}, | |
{ | |
"epoch": 51.84, | |
"learning_rate": 0.0001, | |
"loss": 0.0566, | |
"step": 1625 | |
}, | |
{ | |
"epoch": 52.0, | |
"learning_rate": 0.0001, | |
"loss": 0.0576, | |
"step": 1630 | |
}, | |
{ | |
"epoch": 52.16, | |
"learning_rate": 0.0001, | |
"loss": 0.0545, | |
"step": 1635 | |
}, | |
{ | |
"epoch": 52.32, | |
"learning_rate": 0.0001, | |
"loss": 0.0552, | |
"step": 1640 | |
}, | |
{ | |
"epoch": 52.48, | |
"learning_rate": 0.0001, | |
"loss": 0.0534, | |
"step": 1645 | |
}, | |
{ | |
"epoch": 52.64, | |
"learning_rate": 0.0001, | |
"loss": 0.0535, | |
"step": 1650 | |
}, | |
{ | |
"epoch": 52.8, | |
"learning_rate": 0.0001, | |
"loss": 0.0545, | |
"step": 1655 | |
}, | |
{ | |
"epoch": 52.96, | |
"learning_rate": 0.0001, | |
"loss": 0.055, | |
"step": 1660 | |
}, | |
{ | |
"epoch": 53.12, | |
"learning_rate": 0.0001, | |
"loss": 0.0526, | |
"step": 1665 | |
}, | |
{ | |
"epoch": 53.28, | |
"learning_rate": 0.0001, | |
"loss": 0.0527, | |
"step": 1670 | |
}, | |
{ | |
"epoch": 53.44, | |
"learning_rate": 0.0001, | |
"loss": 0.0521, | |
"step": 1675 | |
}, | |
{ | |
"epoch": 53.6, | |
"learning_rate": 0.0001, | |
"loss": 0.0529, | |
"step": 1680 | |
}, | |
{ | |
"epoch": 53.76, | |
"learning_rate": 0.0001, | |
"loss": 0.0531, | |
"step": 1685 | |
}, | |
{ | |
"epoch": 53.92, | |
"learning_rate": 0.0001, | |
"loss": 0.0532, | |
"step": 1690 | |
}, | |
{ | |
"epoch": 54.08, | |
"learning_rate": 0.0001, | |
"loss": 0.0524, | |
"step": 1695 | |
}, | |
{ | |
"epoch": 54.24, | |
"learning_rate": 0.0001, | |
"loss": 0.0516, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 54.4, | |
"learning_rate": 0.0001, | |
"loss": 0.0496, | |
"step": 1705 | |
}, | |
{ | |
"epoch": 54.56, | |
"learning_rate": 0.0001, | |
"loss": 0.0517, | |
"step": 1710 | |
}, | |
{ | |
"epoch": 54.72, | |
"learning_rate": 0.0001, | |
"loss": 0.0528, | |
"step": 1715 | |
}, | |
{ | |
"epoch": 54.88, | |
"learning_rate": 0.0001, | |
"loss": 0.0509, | |
"step": 1720 | |
}, | |
{ | |
"epoch": 55.03, | |
"learning_rate": 0.0001, | |
"loss": 0.0503, | |
"step": 1725 | |
}, | |
{ | |
"epoch": 55.19, | |
"learning_rate": 0.0001, | |
"loss": 0.0493, | |
"step": 1730 | |
}, | |
{ | |
"epoch": 55.35, | |
"learning_rate": 0.0001, | |
"loss": 0.0492, | |
"step": 1735 | |
}, | |
{ | |
"epoch": 55.51, | |
"learning_rate": 0.0001, | |
"loss": 0.0495, | |
"step": 1740 | |
}, | |
{ | |
"epoch": 55.67, | |
"learning_rate": 0.0001, | |
"loss": 0.0491, | |
"step": 1745 | |
}, | |
{ | |
"epoch": 55.83, | |
"learning_rate": 0.0001, | |
"loss": 0.0503, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 55.99, | |
"learning_rate": 0.0001, | |
"loss": 0.0516, | |
"step": 1755 | |
}, | |
{ | |
"epoch": 56.15, | |
"learning_rate": 0.0001, | |
"loss": 0.0475, | |
"step": 1760 | |
}, | |
{ | |
"epoch": 56.31, | |
"learning_rate": 0.0001, | |
"loss": 0.0493, | |
"step": 1765 | |
}, | |
{ | |
"epoch": 56.47, | |
"learning_rate": 0.0001, | |
"loss": 0.0493, | |
"step": 1770 | |
}, | |
{ | |
"epoch": 56.63, | |
"learning_rate": 0.0001, | |
"loss": 0.0484, | |
"step": 1775 | |
}, | |
{ | |
"epoch": 56.79, | |
"learning_rate": 0.0001, | |
"loss": 0.0479, | |
"step": 1780 | |
}, | |
{ | |
"epoch": 56.95, | |
"learning_rate": 0.0001, | |
"loss": 0.0488, | |
"step": 1785 | |
}, | |
{ | |
"epoch": 57.11, | |
"learning_rate": 0.0001, | |
"loss": 0.0468, | |
"step": 1790 | |
}, | |
{ | |
"epoch": 57.27, | |
"learning_rate": 0.0001, | |
"loss": 0.0479, | |
"step": 1795 | |
}, | |
{ | |
"epoch": 57.43, | |
"learning_rate": 0.0001, | |
"loss": 0.0462, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 57.59, | |
"learning_rate": 0.0001, | |
"loss": 0.0488, | |
"step": 1805 | |
}, | |
{ | |
"epoch": 57.75, | |
"learning_rate": 0.0001, | |
"loss": 0.0488, | |
"step": 1810 | |
}, | |
{ | |
"epoch": 57.91, | |
"learning_rate": 0.0001, | |
"loss": 0.048, | |
"step": 1815 | |
}, | |
{ | |
"epoch": 58.07, | |
"learning_rate": 0.0001, | |
"loss": 0.0474, | |
"step": 1820 | |
}, | |
{ | |
"epoch": 58.23, | |
"learning_rate": 0.0001, | |
"loss": 0.0469, | |
"step": 1825 | |
}, | |
{ | |
"epoch": 58.38, | |
"learning_rate": 0.0001, | |
"loss": 0.0456, | |
"step": 1830 | |
}, | |
{ | |
"epoch": 58.54, | |
"learning_rate": 0.0001, | |
"loss": 0.0465, | |
"step": 1835 | |
}, | |
{ | |
"epoch": 58.7, | |
"learning_rate": 0.0001, | |
"loss": 0.0475, | |
"step": 1840 | |
}, | |
{ | |
"epoch": 58.86, | |
"learning_rate": 0.0001, | |
"loss": 0.0469, | |
"step": 1845 | |
}, | |
{ | |
"epoch": 59.02, | |
"learning_rate": 0.0001, | |
"loss": 0.0472, | |
"step": 1850 | |
}, | |
{ | |
"epoch": 59.18, | |
"learning_rate": 0.0001, | |
"loss": 0.0453, | |
"step": 1855 | |
}, | |
{ | |
"epoch": 59.34, | |
"learning_rate": 0.0001, | |
"loss": 0.0464, | |
"step": 1860 | |
}, | |
{ | |
"epoch": 59.5, | |
"learning_rate": 0.0001, | |
"loss": 0.0456, | |
"step": 1865 | |
}, | |
{ | |
"epoch": 59.66, | |
"learning_rate": 0.0001, | |
"loss": 0.0463, | |
"step": 1870 | |
}, | |
{ | |
"epoch": 59.82, | |
"learning_rate": 0.0001, | |
"loss": 0.0467, | |
"step": 1875 | |
}, | |
{ | |
"epoch": 59.98, | |
"learning_rate": 0.0001, | |
"loss": 0.046, | |
"step": 1880 | |
}, | |
{ | |
"epoch": 60.14, | |
"learning_rate": 0.0001, | |
"loss": 0.0444, | |
"step": 1885 | |
}, | |
{ | |
"epoch": 60.3, | |
"learning_rate": 0.0001, | |
"loss": 0.0448, | |
"step": 1890 | |
}, | |
{ | |
"epoch": 60.46, | |
"learning_rate": 0.0001, | |
"loss": 0.0441, | |
"step": 1895 | |
}, | |
{ | |
"epoch": 60.62, | |
"learning_rate": 0.0001, | |
"loss": 0.0468, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 60.78, | |
"learning_rate": 0.0001, | |
"loss": 0.045, | |
"step": 1905 | |
}, | |
{ | |
"epoch": 60.94, | |
"learning_rate": 0.0001, | |
"loss": 0.0454, | |
"step": 1910 | |
}, | |
{ | |
"epoch": 61.1, | |
"learning_rate": 0.0001, | |
"loss": 0.0454, | |
"step": 1915 | |
}, | |
{ | |
"epoch": 61.26, | |
"learning_rate": 0.0001, | |
"loss": 0.044, | |
"step": 1920 | |
}, | |
{ | |
"epoch": 61.42, | |
"learning_rate": 0.0001, | |
"loss": 0.0451, | |
"step": 1925 | |
}, | |
{ | |
"epoch": 61.58, | |
"learning_rate": 0.0001, | |
"loss": 0.0436, | |
"step": 1930 | |
}, | |
{ | |
"epoch": 61.73, | |
"learning_rate": 0.0001, | |
"loss": 0.044, | |
"step": 1935 | |
}, | |
{ | |
"epoch": 61.89, | |
"learning_rate": 0.0001, | |
"loss": 0.0444, | |
"step": 1940 | |
}, | |
{ | |
"epoch": 62.05, | |
"learning_rate": 0.0001, | |
"loss": 0.043, | |
"step": 1945 | |
}, | |
{ | |
"epoch": 62.21, | |
"learning_rate": 0.0001, | |
"loss": 0.0414, | |
"step": 1950 | |
}, | |
{ | |
"epoch": 62.37, | |
"learning_rate": 0.0001, | |
"loss": 0.0417, | |
"step": 1955 | |
}, | |
{ | |
"epoch": 62.53, | |
"learning_rate": 0.0001, | |
"loss": 0.0433, | |
"step": 1960 | |
}, | |
{ | |
"epoch": 62.69, | |
"learning_rate": 0.0001, | |
"loss": 0.0437, | |
"step": 1965 | |
}, | |
{ | |
"epoch": 62.85, | |
"learning_rate": 0.0001, | |
"loss": 0.0428, | |
"step": 1970 | |
}, | |
{ | |
"epoch": 63.01, | |
"learning_rate": 0.0001, | |
"loss": 0.0437, | |
"step": 1975 | |
}, | |
{ | |
"epoch": 63.17, | |
"learning_rate": 0.0001, | |
"loss": 0.042, | |
"step": 1980 | |
}, | |
{ | |
"epoch": 63.33, | |
"learning_rate": 0.0001, | |
"loss": 0.0413, | |
"step": 1985 | |
}, | |
{ | |
"epoch": 63.49, | |
"learning_rate": 0.0001, | |
"loss": 0.0426, | |
"step": 1990 | |
}, | |
{ | |
"epoch": 63.65, | |
"learning_rate": 0.0001, | |
"loss": 0.042, | |
"step": 1995 | |
}, | |
{ | |
"epoch": 63.81, | |
"learning_rate": 0.0001, | |
"loss": 0.0434, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 63.97, | |
"learning_rate": 0.0001, | |
"loss": 0.0429, | |
"step": 2005 | |
}, | |
{ | |
"epoch": 64.13, | |
"learning_rate": 0.0001, | |
"loss": 0.0421, | |
"step": 2010 | |
}, | |
{ | |
"epoch": 64.29, | |
"learning_rate": 0.0001, | |
"loss": 0.0411, | |
"step": 2015 | |
}, | |
{ | |
"epoch": 64.45, | |
"learning_rate": 0.0001, | |
"loss": 0.0415, | |
"step": 2020 | |
}, | |
{ | |
"epoch": 64.61, | |
"learning_rate": 0.0001, | |
"loss": 0.0401, | |
"step": 2025 | |
}, | |
{ | |
"epoch": 64.77, | |
"learning_rate": 0.0001, | |
"loss": 0.0417, | |
"step": 2030 | |
}, | |
{ | |
"epoch": 64.93, | |
"learning_rate": 0.0001, | |
"loss": 0.0418, | |
"step": 2035 | |
}, | |
{ | |
"epoch": 65.08, | |
"learning_rate": 0.0001, | |
"loss": 0.0409, | |
"step": 2040 | |
}, | |
{ | |
"epoch": 65.24, | |
"learning_rate": 0.0001, | |
"loss": 0.0404, | |
"step": 2045 | |
}, | |
{ | |
"epoch": 65.4, | |
"learning_rate": 0.0001, | |
"loss": 0.0404, | |
"step": 2050 | |
}, | |
{ | |
"epoch": 65.56, | |
"learning_rate": 0.0001, | |
"loss": 0.0403, | |
"step": 2055 | |
}, | |
{ | |
"epoch": 65.72, | |
"learning_rate": 0.0001, | |
"loss": 0.0399, | |
"step": 2060 | |
}, | |
{ | |
"epoch": 65.88, | |
"learning_rate": 0.0001, | |
"loss": 0.042, | |
"step": 2065 | |
}, | |
{ | |
"epoch": 66.04, | |
"learning_rate": 0.0001, | |
"loss": 0.0397, | |
"step": 2070 | |
}, | |
{ | |
"epoch": 66.2, | |
"learning_rate": 0.0001, | |
"loss": 0.0396, | |
"step": 2075 | |
}, | |
{ | |
"epoch": 66.36, | |
"learning_rate": 0.0001, | |
"loss": 0.0387, | |
"step": 2080 | |
}, | |
{ | |
"epoch": 66.52, | |
"learning_rate": 0.0001, | |
"loss": 0.0392, | |
"step": 2085 | |
}, | |
{ | |
"epoch": 66.68, | |
"learning_rate": 0.0001, | |
"loss": 0.0407, | |
"step": 2090 | |
}, | |
{ | |
"epoch": 66.84, | |
"learning_rate": 0.0001, | |
"loss": 0.0414, | |
"step": 2095 | |
}, | |
{ | |
"epoch": 67.0, | |
"learning_rate": 0.0001, | |
"loss": 0.0407, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 67.16, | |
"learning_rate": 0.0001, | |
"loss": 0.0404, | |
"step": 2105 | |
}, | |
{ | |
"epoch": 67.32, | |
"learning_rate": 0.0001, | |
"loss": 0.0382, | |
"step": 2110 | |
}, | |
{ | |
"epoch": 67.48, | |
"learning_rate": 0.0001, | |
"loss": 0.0397, | |
"step": 2115 | |
}, | |
{ | |
"epoch": 67.64, | |
"learning_rate": 0.0001, | |
"loss": 0.0392, | |
"step": 2120 | |
}, | |
{ | |
"epoch": 67.8, | |
"learning_rate": 0.0001, | |
"loss": 0.0383, | |
"step": 2125 | |
}, | |
{ | |
"epoch": 67.96, | |
"learning_rate": 0.0001, | |
"loss": 0.0397, | |
"step": 2130 | |
}, | |
{ | |
"epoch": 68.12, | |
"learning_rate": 0.0001, | |
"loss": 0.0384, | |
"step": 2135 | |
}, | |
{ | |
"epoch": 68.28, | |
"learning_rate": 0.0001, | |
"loss": 0.0376, | |
"step": 2140 | |
}, | |
{ | |
"epoch": 68.43, | |
"learning_rate": 0.0001, | |
"loss": 0.0383, | |
"step": 2145 | |
}, | |
{ | |
"epoch": 68.59, | |
"learning_rate": 0.0001, | |
"loss": 0.0393, | |
"step": 2150 | |
}, | |
{ | |
"epoch": 68.75, | |
"learning_rate": 0.0001, | |
"loss": 0.0387, | |
"step": 2155 | |
}, | |
{ | |
"epoch": 68.91, | |
"learning_rate": 0.0001, | |
"loss": 0.0382, | |
"step": 2160 | |
}, | |
{ | |
"epoch": 69.07, | |
"learning_rate": 0.0001, | |
"loss": 0.0382, | |
"step": 2165 | |
}, | |
{ | |
"epoch": 69.23, | |
"learning_rate": 0.0001, | |
"loss": 0.0376, | |
"step": 2170 | |
}, | |
{ | |
"epoch": 69.39, | |
"learning_rate": 0.0001, | |
"loss": 0.0375, | |
"step": 2175 | |
}, | |
{ | |
"epoch": 69.55, | |
"learning_rate": 0.0001, | |
"loss": 0.0382, | |
"step": 2180 | |
}, | |
{ | |
"epoch": 69.71, | |
"learning_rate": 0.0001, | |
"loss": 0.0386, | |
"step": 2185 | |
}, | |
{ | |
"epoch": 69.87, | |
"learning_rate": 0.0001, | |
"loss": 0.0373, | |
"step": 2190 | |
}, | |
{ | |
"epoch": 70.03, | |
"learning_rate": 0.0001, | |
"loss": 0.0376, | |
"step": 2195 | |
}, | |
{ | |
"epoch": 70.19, | |
"learning_rate": 0.0001, | |
"loss": 0.037, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 70.35, | |
"learning_rate": 0.0001, | |
"loss": 0.0368, | |
"step": 2205 | |
}, | |
{ | |
"epoch": 70.51, | |
"learning_rate": 0.0001, | |
"loss": 0.036, | |
"step": 2210 | |
}, | |
{ | |
"epoch": 70.67, | |
"learning_rate": 0.0001, | |
"loss": 0.0375, | |
"step": 2215 | |
}, | |
{ | |
"epoch": 70.83, | |
"learning_rate": 0.0001, | |
"loss": 0.0368, | |
"step": 2220 | |
}, | |
{ | |
"epoch": 70.99, | |
"learning_rate": 0.0001, | |
"loss": 0.038, | |
"step": 2225 | |
}, | |
{ | |
"epoch": 71.15, | |
"learning_rate": 0.0001, | |
"loss": 0.0368, | |
"step": 2230 | |
}, | |
{ | |
"epoch": 71.31, | |
"learning_rate": 0.0001, | |
"loss": 0.036, | |
"step": 2235 | |
}, | |
{ | |
"epoch": 71.47, | |
"learning_rate": 0.0001, | |
"loss": 0.0367, | |
"step": 2240 | |
}, | |
{ | |
"epoch": 71.63, | |
"learning_rate": 0.0001, | |
"loss": 0.0358, | |
"step": 2245 | |
}, | |
{ | |
"epoch": 71.78, | |
"learning_rate": 0.0001, | |
"loss": 0.0363, | |
"step": 2250 | |
}, | |
{ | |
"epoch": 71.94, | |
"learning_rate": 0.0001, | |
"loss": 0.0366, | |
"step": 2255 | |
}, | |
{ | |
"epoch": 72.1, | |
"learning_rate": 0.0001, | |
"loss": 0.0356, | |
"step": 2260 | |
}, | |
{ | |
"epoch": 72.26, | |
"learning_rate": 0.0001, | |
"loss": 0.0344, | |
"step": 2265 | |
}, | |
{ | |
"epoch": 72.42, | |
"learning_rate": 0.0001, | |
"loss": 0.0366, | |
"step": 2270 | |
}, | |
{ | |
"epoch": 72.58, | |
"learning_rate": 0.0001, | |
"loss": 0.0362, | |
"step": 2275 | |
}, | |
{ | |
"epoch": 72.74, | |
"learning_rate": 0.0001, | |
"loss": 0.0354, | |
"step": 2280 | |
}, | |
{ | |
"epoch": 72.9, | |
"learning_rate": 0.0001, | |
"loss": 0.0362, | |
"step": 2285 | |
}, | |
{ | |
"epoch": 73.06, | |
"learning_rate": 0.0001, | |
"loss": 0.0358, | |
"step": 2290 | |
}, | |
{ | |
"epoch": 73.22, | |
"learning_rate": 0.0001, | |
"loss": 0.035, | |
"step": 2295 | |
}, | |
{ | |
"epoch": 73.38, | |
"learning_rate": 0.0001, | |
"loss": 0.0354, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 73.54, | |
"learning_rate": 0.0001, | |
"loss": 0.0357, | |
"step": 2305 | |
}, | |
{ | |
"epoch": 73.7, | |
"learning_rate": 0.0001, | |
"loss": 0.0344, | |
"step": 2310 | |
}, | |
{ | |
"epoch": 73.86, | |
"learning_rate": 0.0001, | |
"loss": 0.0348, | |
"step": 2315 | |
}, | |
{ | |
"epoch": 74.02, | |
"learning_rate": 0.0001, | |
"loss": 0.0351, | |
"step": 2320 | |
}, | |
{ | |
"epoch": 74.18, | |
"learning_rate": 0.0001, | |
"loss": 0.0339, | |
"step": 2325 | |
}, | |
{ | |
"epoch": 74.18, | |
"step": 2325, | |
"total_flos": 2.773380910546944e+17, | |
"train_loss": 0.22404980731266802, | |
"train_runtime": 24311.8759, | |
"train_samples_per_second": 3.094, | |
"train_steps_per_second": 0.096 | |
} | |
], | |
"logging_steps": 5, | |
"max_steps": 2325, | |
"num_train_epochs": 75, | |
"save_steps": -2325, | |
"total_flos": 2.773380910546944e+17, | |
"trial_name": null, | |
"trial_params": null | |
} | |