|
{ |
|
"best_metric": 0.9869447350502014, |
|
"best_model_checkpoint": "./output_c/checkpoint-842163", |
|
"epoch": 50.0, |
|
"global_step": 859350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.960016291383022e-05, |
|
"loss": 2.1017, |
|
"step": 17186 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.6622060537338257, |
|
"eval_runtime": 41.4466, |
|
"eval_samples_per_second": 1657.917, |
|
"eval_steps_per_second": 51.826, |
|
"step": 17187 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.920034910106476e-05, |
|
"loss": 1.684, |
|
"step": 34372 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.5238641500473022, |
|
"eval_runtime": 41.103, |
|
"eval_samples_per_second": 1671.774, |
|
"eval_steps_per_second": 52.259, |
|
"step": 34374 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.8800558561703613e-05, |
|
"loss": 1.5681, |
|
"step": 51558 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.4425514936447144, |
|
"eval_runtime": 40.7311, |
|
"eval_samples_per_second": 1687.039, |
|
"eval_steps_per_second": 52.736, |
|
"step": 51561 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.840076802234247e-05, |
|
"loss": 1.495, |
|
"step": 68744 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.3773192167282104, |
|
"eval_runtime": 40.0387, |
|
"eval_samples_per_second": 1716.214, |
|
"eval_steps_per_second": 53.648, |
|
"step": 68748 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.8001000756385642e-05, |
|
"loss": 1.4413, |
|
"step": 85930 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.3373253345489502, |
|
"eval_runtime": 40.4299, |
|
"eval_samples_per_second": 1699.61, |
|
"eval_steps_per_second": 53.129, |
|
"step": 85935 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.7601186943620177e-05, |
|
"loss": 1.3994, |
|
"step": 103116 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.3044638633728027, |
|
"eval_runtime": 40.8305, |
|
"eval_samples_per_second": 1682.934, |
|
"eval_steps_per_second": 52.608, |
|
"step": 103122 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.7201396404259034e-05, |
|
"loss": 1.3648, |
|
"step": 120302 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.2776345014572144, |
|
"eval_runtime": 40.5929, |
|
"eval_samples_per_second": 1692.784, |
|
"eval_steps_per_second": 52.916, |
|
"step": 120309 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.680160586489789e-05, |
|
"loss": 1.3356, |
|
"step": 137488 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.2525078058242798, |
|
"eval_runtime": 41.1415, |
|
"eval_samples_per_second": 1670.213, |
|
"eval_steps_per_second": 52.21, |
|
"step": 137496 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.640179205213243e-05, |
|
"loss": 1.3103, |
|
"step": 154674 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.229679822921753, |
|
"eval_runtime": 40.8721, |
|
"eval_samples_per_second": 1681.22, |
|
"eval_steps_per_second": 52.554, |
|
"step": 154683 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.6002001512771283e-05, |
|
"loss": 1.287, |
|
"step": 171860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.212567925453186, |
|
"eval_runtime": 39.8522, |
|
"eval_samples_per_second": 1724.248, |
|
"eval_steps_per_second": 53.899, |
|
"step": 171870 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.560218770000582e-05, |
|
"loss": 1.2684, |
|
"step": 189046 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.194634199142456, |
|
"eval_runtime": 39.8234, |
|
"eval_samples_per_second": 1725.494, |
|
"eval_steps_per_second": 53.938, |
|
"step": 189057 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.5202373887240358e-05, |
|
"loss": 1.2503, |
|
"step": 206232 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.181823968887329, |
|
"eval_runtime": 40.2477, |
|
"eval_samples_per_second": 1707.301, |
|
"eval_steps_per_second": 53.369, |
|
"step": 206244 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.4802606621283528e-05, |
|
"loss": 1.2338, |
|
"step": 223418 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.1666418313980103, |
|
"eval_runtime": 40.5887, |
|
"eval_samples_per_second": 1692.959, |
|
"eval_steps_per_second": 52.921, |
|
"step": 223431 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 1.4402816081922383e-05, |
|
"loss": 1.2175, |
|
"step": 240604 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.1534117460250854, |
|
"eval_runtime": 40.4201, |
|
"eval_samples_per_second": 1700.022, |
|
"eval_steps_per_second": 53.142, |
|
"step": 240618 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.4003048815965557e-05, |
|
"loss": 1.2051, |
|
"step": 257790 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.1395701169967651, |
|
"eval_runtime": 40.5078, |
|
"eval_samples_per_second": 1696.341, |
|
"eval_steps_per_second": 53.027, |
|
"step": 257805 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1.3603258276604412e-05, |
|
"loss": 1.1938, |
|
"step": 274976 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.1312499046325684, |
|
"eval_runtime": 40.784, |
|
"eval_samples_per_second": 1684.852, |
|
"eval_steps_per_second": 52.668, |
|
"step": 274992 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.3203491010647584e-05, |
|
"loss": 1.182, |
|
"step": 292162 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.1207791566848755, |
|
"eval_runtime": 40.735, |
|
"eval_samples_per_second": 1686.879, |
|
"eval_steps_per_second": 52.731, |
|
"step": 292179 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1.2803700471286437e-05, |
|
"loss": 1.1706, |
|
"step": 309348 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.11408269405365, |
|
"eval_runtime": 40.5382, |
|
"eval_samples_per_second": 1695.07, |
|
"eval_steps_per_second": 52.987, |
|
"step": 309366 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 1.2403886658520976e-05, |
|
"loss": 1.1597, |
|
"step": 326534 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.1045416593551636, |
|
"eval_runtime": 41.1519, |
|
"eval_samples_per_second": 1669.791, |
|
"eval_steps_per_second": 52.197, |
|
"step": 326553 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.2004096119159831e-05, |
|
"loss": 1.1519, |
|
"step": 343720 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.0968821048736572, |
|
"eval_runtime": 40.942, |
|
"eval_samples_per_second": 1678.35, |
|
"eval_steps_per_second": 52.464, |
|
"step": 343740 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 1.1604305579798688e-05, |
|
"loss": 1.1436, |
|
"step": 360906 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.0863711833953857, |
|
"eval_runtime": 40.8058, |
|
"eval_samples_per_second": 1683.95, |
|
"eval_steps_per_second": 52.64, |
|
"step": 360927 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 1.120451504043754e-05, |
|
"loss": 1.1336, |
|
"step": 378092 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.082047939300537, |
|
"eval_runtime": 40.4735, |
|
"eval_samples_per_second": 1697.777, |
|
"eval_steps_per_second": 53.072, |
|
"step": 378114 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 1.0804701227672078e-05, |
|
"loss": 1.1265, |
|
"step": 395278 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.0744354724884033, |
|
"eval_runtime": 41.164, |
|
"eval_samples_per_second": 1669.298, |
|
"eval_steps_per_second": 52.182, |
|
"step": 395301 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 1.0404910688310935e-05, |
|
"loss": 1.119, |
|
"step": 412464 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.0701904296875, |
|
"eval_runtime": 40.4778, |
|
"eval_samples_per_second": 1697.599, |
|
"eval_steps_per_second": 53.066, |
|
"step": 412488 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1.000512014894979e-05, |
|
"loss": 1.1117, |
|
"step": 429650 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.062601089477539, |
|
"eval_runtime": 40.2464, |
|
"eval_samples_per_second": 1707.357, |
|
"eval_steps_per_second": 53.371, |
|
"step": 429675 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 9.60535288299296e-06, |
|
"loss": 1.1048, |
|
"step": 446836 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.0572917461395264, |
|
"eval_runtime": 39.7303, |
|
"eval_samples_per_second": 1729.537, |
|
"eval_steps_per_second": 54.065, |
|
"step": 446862 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 9.205562343631815e-06, |
|
"loss": 1.098, |
|
"step": 464022 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 1.0520741939544678, |
|
"eval_runtime": 39.4819, |
|
"eval_samples_per_second": 1740.42, |
|
"eval_steps_per_second": 54.405, |
|
"step": 464049 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 8.80577180427067e-06, |
|
"loss": 1.0924, |
|
"step": 481208 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 1.0490448474884033, |
|
"eval_runtime": 39.5503, |
|
"eval_samples_per_second": 1737.408, |
|
"eval_steps_per_second": 54.311, |
|
"step": 481236 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 8.405981264909526e-06, |
|
"loss": 1.087, |
|
"step": 498394 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 1.0411999225616455, |
|
"eval_runtime": 39.4139, |
|
"eval_samples_per_second": 1743.421, |
|
"eval_steps_per_second": 54.499, |
|
"step": 498423 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 8.006213998952698e-06, |
|
"loss": 1.0812, |
|
"step": 515580 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 1.0427082777023315, |
|
"eval_runtime": 40.8396, |
|
"eval_samples_per_second": 1682.558, |
|
"eval_steps_per_second": 52.596, |
|
"step": 515610 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 7.606423459591552e-06, |
|
"loss": 1.076, |
|
"step": 532766 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 1.031008243560791, |
|
"eval_runtime": 39.0796, |
|
"eval_samples_per_second": 1758.334, |
|
"eval_steps_per_second": 54.965, |
|
"step": 532797 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 7.206632920230407e-06, |
|
"loss": 1.0707, |
|
"step": 549952 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 1.0325006246566772, |
|
"eval_runtime": 38.4227, |
|
"eval_samples_per_second": 1788.396, |
|
"eval_steps_per_second": 55.904, |
|
"step": 549984 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 6.806842380869262e-06, |
|
"loss": 1.0654, |
|
"step": 567138 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 1.021201252937317, |
|
"eval_runtime": 37.9747, |
|
"eval_samples_per_second": 1809.492, |
|
"eval_steps_per_second": 56.564, |
|
"step": 567171 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 6.407051841508117e-06, |
|
"loss": 1.0609, |
|
"step": 584324 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 1.0170767307281494, |
|
"eval_runtime": 38.0157, |
|
"eval_samples_per_second": 1807.544, |
|
"eval_steps_per_second": 56.503, |
|
"step": 584358 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 6.007238028742655e-06, |
|
"loss": 1.0572, |
|
"step": 601510 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 1.0200223922729492, |
|
"eval_runtime": 38.7209, |
|
"eval_samples_per_second": 1774.625, |
|
"eval_steps_per_second": 55.474, |
|
"step": 601545 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 5.607470762785827e-06, |
|
"loss": 1.0528, |
|
"step": 618696 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 1.0177444219589233, |
|
"eval_runtime": 38.1852, |
|
"eval_samples_per_second": 1799.519, |
|
"eval_steps_per_second": 56.252, |
|
"step": 618732 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 5.207656950020364e-06, |
|
"loss": 1.0485, |
|
"step": 635882 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 1.0121095180511475, |
|
"eval_runtime": 37.9666, |
|
"eval_samples_per_second": 1809.879, |
|
"eval_steps_per_second": 56.576, |
|
"step": 635919 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 4.807889684063537e-06, |
|
"loss": 1.0456, |
|
"step": 653068 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 1.0075100660324097, |
|
"eval_runtime": 37.7745, |
|
"eval_samples_per_second": 1819.084, |
|
"eval_steps_per_second": 56.864, |
|
"step": 653106 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 4.408075871298074e-06, |
|
"loss": 1.0436, |
|
"step": 670254 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 1.0085304975509644, |
|
"eval_runtime": 37.8436, |
|
"eval_samples_per_second": 1815.761, |
|
"eval_steps_per_second": 56.76, |
|
"step": 670293 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.008308605341247e-06, |
|
"loss": 1.0403, |
|
"step": 687440 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 1.0014568567276, |
|
"eval_runtime": 37.7063, |
|
"eval_samples_per_second": 1822.377, |
|
"eval_steps_per_second": 56.967, |
|
"step": 687480 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 3.608494792575784e-06, |
|
"loss": 1.0358, |
|
"step": 704626 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 0.9997268915176392, |
|
"eval_runtime": 37.9829, |
|
"eval_samples_per_second": 1809.103, |
|
"eval_steps_per_second": 56.552, |
|
"step": 704667 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 3.208704253214639e-06, |
|
"loss": 1.0339, |
|
"step": 721812 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 1.0009877681732178, |
|
"eval_runtime": 37.7238, |
|
"eval_samples_per_second": 1821.528, |
|
"eval_steps_per_second": 56.94, |
|
"step": 721854 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 2.808890440449177e-06, |
|
"loss": 1.0312, |
|
"step": 738998 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 0.9973294138908386, |
|
"eval_runtime": 37.9123, |
|
"eval_samples_per_second": 1812.472, |
|
"eval_steps_per_second": 56.657, |
|
"step": 739041 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 2.4090999010880316e-06, |
|
"loss": 1.0278, |
|
"step": 756184 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.9942373037338257, |
|
"eval_runtime": 37.8136, |
|
"eval_samples_per_second": 1817.205, |
|
"eval_steps_per_second": 56.805, |
|
"step": 756228 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 2.0093093617268868e-06, |
|
"loss": 1.0258, |
|
"step": 773370 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 0.9922175407409668, |
|
"eval_runtime": 37.7488, |
|
"eval_samples_per_second": 1820.324, |
|
"eval_steps_per_second": 56.903, |
|
"step": 773415 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 1.6094955489614245e-06, |
|
"loss": 1.024, |
|
"step": 790556 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.9902246594429016, |
|
"eval_runtime": 37.7011, |
|
"eval_samples_per_second": 1822.624, |
|
"eval_steps_per_second": 56.974, |
|
"step": 790602 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 1.2096817361959622e-06, |
|
"loss": 1.0213, |
|
"step": 807742 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 0.9919160008430481, |
|
"eval_runtime": 37.515, |
|
"eval_samples_per_second": 1831.667, |
|
"eval_steps_per_second": 57.257, |
|
"step": 807789 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 8.09891196834817e-07, |
|
"loss": 1.0202, |
|
"step": 824928 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.9896969199180603, |
|
"eval_runtime": 37.5208, |
|
"eval_samples_per_second": 1831.385, |
|
"eval_steps_per_second": 57.248, |
|
"step": 824976 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 4.1007738406935476e-07, |
|
"loss": 1.0184, |
|
"step": 842114 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 0.9869447350502014, |
|
"eval_runtime": 37.5715, |
|
"eval_samples_per_second": 1828.912, |
|
"eval_steps_per_second": 57.171, |
|
"step": 842163 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.0286844708209695e-08, |
|
"loss": 1.0185, |
|
"step": 859300 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.9913281798362732, |
|
"eval_runtime": 38.0547, |
|
"eval_samples_per_second": 1805.692, |
|
"eval_steps_per_second": 56.445, |
|
"step": 859350 |
|
} |
|
], |
|
"max_steps": 859350, |
|
"num_train_epochs": 50, |
|
"total_flos": 9.113148657842688e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|