{ "best_metric": 0.9869447350502014, "best_model_checkpoint": "./output_c/checkpoint-842163", "epoch": 50.0, "global_step": 859350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.960016291383022e-05, "loss": 2.1017, "step": 17186 }, { "epoch": 1.0, "eval_loss": 1.6622060537338257, "eval_runtime": 41.4466, "eval_samples_per_second": 1657.917, "eval_steps_per_second": 51.826, "step": 17187 }, { "epoch": 2.0, "learning_rate": 1.920034910106476e-05, "loss": 1.684, "step": 34372 }, { "epoch": 2.0, "eval_loss": 1.5238641500473022, "eval_runtime": 41.103, "eval_samples_per_second": 1671.774, "eval_steps_per_second": 52.259, "step": 34374 }, { "epoch": 3.0, "learning_rate": 1.8800558561703613e-05, "loss": 1.5681, "step": 51558 }, { "epoch": 3.0, "eval_loss": 1.4425514936447144, "eval_runtime": 40.7311, "eval_samples_per_second": 1687.039, "eval_steps_per_second": 52.736, "step": 51561 }, { "epoch": 4.0, "learning_rate": 1.840076802234247e-05, "loss": 1.495, "step": 68744 }, { "epoch": 4.0, "eval_loss": 1.3773192167282104, "eval_runtime": 40.0387, "eval_samples_per_second": 1716.214, "eval_steps_per_second": 53.648, "step": 68748 }, { "epoch": 5.0, "learning_rate": 1.8001000756385642e-05, "loss": 1.4413, "step": 85930 }, { "epoch": 5.0, "eval_loss": 1.3373253345489502, "eval_runtime": 40.4299, "eval_samples_per_second": 1699.61, "eval_steps_per_second": 53.129, "step": 85935 }, { "epoch": 6.0, "learning_rate": 1.7601186943620177e-05, "loss": 1.3994, "step": 103116 }, { "epoch": 6.0, "eval_loss": 1.3044638633728027, "eval_runtime": 40.8305, "eval_samples_per_second": 1682.934, "eval_steps_per_second": 52.608, "step": 103122 }, { "epoch": 7.0, "learning_rate": 1.7201396404259034e-05, "loss": 1.3648, "step": 120302 }, { "epoch": 7.0, "eval_loss": 1.2776345014572144, "eval_runtime": 40.5929, "eval_samples_per_second": 1692.784, "eval_steps_per_second": 52.916, "step": 120309 }, { "epoch": 8.0, "learning_rate": 1.680160586489789e-05, "loss": 1.3356, "step": 137488 }, { "epoch": 8.0, "eval_loss": 1.2525078058242798, "eval_runtime": 41.1415, "eval_samples_per_second": 1670.213, "eval_steps_per_second": 52.21, "step": 137496 }, { "epoch": 9.0, "learning_rate": 1.640179205213243e-05, "loss": 1.3103, "step": 154674 }, { "epoch": 9.0, "eval_loss": 1.229679822921753, "eval_runtime": 40.8721, "eval_samples_per_second": 1681.22, "eval_steps_per_second": 52.554, "step": 154683 }, { "epoch": 10.0, "learning_rate": 1.6002001512771283e-05, "loss": 1.287, "step": 171860 }, { "epoch": 10.0, "eval_loss": 1.212567925453186, "eval_runtime": 39.8522, "eval_samples_per_second": 1724.248, "eval_steps_per_second": 53.899, "step": 171870 }, { "epoch": 11.0, "learning_rate": 1.560218770000582e-05, "loss": 1.2684, "step": 189046 }, { "epoch": 11.0, "eval_loss": 1.194634199142456, "eval_runtime": 39.8234, "eval_samples_per_second": 1725.494, "eval_steps_per_second": 53.938, "step": 189057 }, { "epoch": 12.0, "learning_rate": 1.5202373887240358e-05, "loss": 1.2503, "step": 206232 }, { "epoch": 12.0, "eval_loss": 1.181823968887329, "eval_runtime": 40.2477, "eval_samples_per_second": 1707.301, "eval_steps_per_second": 53.369, "step": 206244 }, { "epoch": 13.0, "learning_rate": 1.4802606621283528e-05, "loss": 1.2338, "step": 223418 }, { "epoch": 13.0, "eval_loss": 1.1666418313980103, "eval_runtime": 40.5887, "eval_samples_per_second": 1692.959, "eval_steps_per_second": 52.921, "step": 223431 }, { "epoch": 14.0, "learning_rate": 1.4402816081922383e-05, "loss": 1.2175, "step": 240604 }, { "epoch": 14.0, "eval_loss": 1.1534117460250854, "eval_runtime": 40.4201, "eval_samples_per_second": 1700.022, "eval_steps_per_second": 53.142, "step": 240618 }, { "epoch": 15.0, "learning_rate": 1.4003048815965557e-05, "loss": 1.2051, "step": 257790 }, { "epoch": 15.0, "eval_loss": 1.1395701169967651, "eval_runtime": 40.5078, "eval_samples_per_second": 1696.341, "eval_steps_per_second": 53.027, "step": 257805 }, { "epoch": 16.0, "learning_rate": 1.3603258276604412e-05, "loss": 1.1938, "step": 274976 }, { "epoch": 16.0, "eval_loss": 1.1312499046325684, "eval_runtime": 40.784, "eval_samples_per_second": 1684.852, "eval_steps_per_second": 52.668, "step": 274992 }, { "epoch": 17.0, "learning_rate": 1.3203491010647584e-05, "loss": 1.182, "step": 292162 }, { "epoch": 17.0, "eval_loss": 1.1207791566848755, "eval_runtime": 40.735, "eval_samples_per_second": 1686.879, "eval_steps_per_second": 52.731, "step": 292179 }, { "epoch": 18.0, "learning_rate": 1.2803700471286437e-05, "loss": 1.1706, "step": 309348 }, { "epoch": 18.0, "eval_loss": 1.11408269405365, "eval_runtime": 40.5382, "eval_samples_per_second": 1695.07, "eval_steps_per_second": 52.987, "step": 309366 }, { "epoch": 19.0, "learning_rate": 1.2403886658520976e-05, "loss": 1.1597, "step": 326534 }, { "epoch": 19.0, "eval_loss": 1.1045416593551636, "eval_runtime": 41.1519, "eval_samples_per_second": 1669.791, "eval_steps_per_second": 52.197, "step": 326553 }, { "epoch": 20.0, "learning_rate": 1.2004096119159831e-05, "loss": 1.1519, "step": 343720 }, { "epoch": 20.0, "eval_loss": 1.0968821048736572, "eval_runtime": 40.942, "eval_samples_per_second": 1678.35, "eval_steps_per_second": 52.464, "step": 343740 }, { "epoch": 21.0, "learning_rate": 1.1604305579798688e-05, "loss": 1.1436, "step": 360906 }, { "epoch": 21.0, "eval_loss": 1.0863711833953857, "eval_runtime": 40.8058, "eval_samples_per_second": 1683.95, "eval_steps_per_second": 52.64, "step": 360927 }, { "epoch": 22.0, "learning_rate": 1.120451504043754e-05, "loss": 1.1336, "step": 378092 }, { "epoch": 22.0, "eval_loss": 1.082047939300537, "eval_runtime": 40.4735, "eval_samples_per_second": 1697.777, "eval_steps_per_second": 53.072, "step": 378114 }, { "epoch": 23.0, "learning_rate": 1.0804701227672078e-05, "loss": 1.1265, "step": 395278 }, { "epoch": 23.0, "eval_loss": 1.0744354724884033, "eval_runtime": 41.164, "eval_samples_per_second": 1669.298, "eval_steps_per_second": 52.182, "step": 395301 }, { "epoch": 24.0, "learning_rate": 1.0404910688310935e-05, "loss": 1.119, "step": 412464 }, { "epoch": 24.0, "eval_loss": 1.0701904296875, "eval_runtime": 40.4778, "eval_samples_per_second": 1697.599, "eval_steps_per_second": 53.066, "step": 412488 }, { "epoch": 25.0, "learning_rate": 1.000512014894979e-05, "loss": 1.1117, "step": 429650 }, { "epoch": 25.0, "eval_loss": 1.062601089477539, "eval_runtime": 40.2464, "eval_samples_per_second": 1707.357, "eval_steps_per_second": 53.371, "step": 429675 }, { "epoch": 26.0, "learning_rate": 9.60535288299296e-06, "loss": 1.1048, "step": 446836 }, { "epoch": 26.0, "eval_loss": 1.0572917461395264, "eval_runtime": 39.7303, "eval_samples_per_second": 1729.537, "eval_steps_per_second": 54.065, "step": 446862 }, { "epoch": 27.0, "learning_rate": 9.205562343631815e-06, "loss": 1.098, "step": 464022 }, { "epoch": 27.0, "eval_loss": 1.0520741939544678, "eval_runtime": 39.4819, "eval_samples_per_second": 1740.42, "eval_steps_per_second": 54.405, "step": 464049 }, { "epoch": 28.0, "learning_rate": 8.80577180427067e-06, "loss": 1.0924, "step": 481208 }, { "epoch": 28.0, "eval_loss": 1.0490448474884033, "eval_runtime": 39.5503, "eval_samples_per_second": 1737.408, "eval_steps_per_second": 54.311, "step": 481236 }, { "epoch": 29.0, "learning_rate": 8.405981264909526e-06, "loss": 1.087, "step": 498394 }, { "epoch": 29.0, "eval_loss": 1.0411999225616455, "eval_runtime": 39.4139, "eval_samples_per_second": 1743.421, "eval_steps_per_second": 54.499, "step": 498423 }, { "epoch": 30.0, "learning_rate": 8.006213998952698e-06, "loss": 1.0812, "step": 515580 }, { "epoch": 30.0, "eval_loss": 1.0427082777023315, "eval_runtime": 40.8396, "eval_samples_per_second": 1682.558, "eval_steps_per_second": 52.596, "step": 515610 }, { "epoch": 31.0, "learning_rate": 7.606423459591552e-06, "loss": 1.076, "step": 532766 }, { "epoch": 31.0, "eval_loss": 1.031008243560791, "eval_runtime": 39.0796, "eval_samples_per_second": 1758.334, "eval_steps_per_second": 54.965, "step": 532797 }, { "epoch": 32.0, "learning_rate": 7.206632920230407e-06, "loss": 1.0707, "step": 549952 }, { "epoch": 32.0, "eval_loss": 1.0325006246566772, "eval_runtime": 38.4227, "eval_samples_per_second": 1788.396, "eval_steps_per_second": 55.904, "step": 549984 }, { "epoch": 33.0, "learning_rate": 6.806842380869262e-06, "loss": 1.0654, "step": 567138 }, { "epoch": 33.0, "eval_loss": 1.021201252937317, "eval_runtime": 37.9747, "eval_samples_per_second": 1809.492, "eval_steps_per_second": 56.564, "step": 567171 }, { "epoch": 34.0, "learning_rate": 6.407051841508117e-06, "loss": 1.0609, "step": 584324 }, { "epoch": 34.0, "eval_loss": 1.0170767307281494, "eval_runtime": 38.0157, "eval_samples_per_second": 1807.544, "eval_steps_per_second": 56.503, "step": 584358 }, { "epoch": 35.0, "learning_rate": 6.007238028742655e-06, "loss": 1.0572, "step": 601510 }, { "epoch": 35.0, "eval_loss": 1.0200223922729492, "eval_runtime": 38.7209, "eval_samples_per_second": 1774.625, "eval_steps_per_second": 55.474, "step": 601545 }, { "epoch": 36.0, "learning_rate": 5.607470762785827e-06, "loss": 1.0528, "step": 618696 }, { "epoch": 36.0, "eval_loss": 1.0177444219589233, "eval_runtime": 38.1852, "eval_samples_per_second": 1799.519, "eval_steps_per_second": 56.252, "step": 618732 }, { "epoch": 37.0, "learning_rate": 5.207656950020364e-06, "loss": 1.0485, "step": 635882 }, { "epoch": 37.0, "eval_loss": 1.0121095180511475, "eval_runtime": 37.9666, "eval_samples_per_second": 1809.879, "eval_steps_per_second": 56.576, "step": 635919 }, { "epoch": 38.0, "learning_rate": 4.807889684063537e-06, "loss": 1.0456, "step": 653068 }, { "epoch": 38.0, "eval_loss": 1.0075100660324097, "eval_runtime": 37.7745, "eval_samples_per_second": 1819.084, "eval_steps_per_second": 56.864, "step": 653106 }, { "epoch": 39.0, "learning_rate": 4.408075871298074e-06, "loss": 1.0436, "step": 670254 }, { "epoch": 39.0, "eval_loss": 1.0085304975509644, "eval_runtime": 37.8436, "eval_samples_per_second": 1815.761, "eval_steps_per_second": 56.76, "step": 670293 }, { "epoch": 40.0, "learning_rate": 4.008308605341247e-06, "loss": 1.0403, "step": 687440 }, { "epoch": 40.0, "eval_loss": 1.0014568567276, "eval_runtime": 37.7063, "eval_samples_per_second": 1822.377, "eval_steps_per_second": 56.967, "step": 687480 }, { "epoch": 41.0, "learning_rate": 3.608494792575784e-06, "loss": 1.0358, "step": 704626 }, { "epoch": 41.0, "eval_loss": 0.9997268915176392, "eval_runtime": 37.9829, "eval_samples_per_second": 1809.103, "eval_steps_per_second": 56.552, "step": 704667 }, { "epoch": 42.0, "learning_rate": 3.208704253214639e-06, "loss": 1.0339, "step": 721812 }, { "epoch": 42.0, "eval_loss": 1.0009877681732178, "eval_runtime": 37.7238, "eval_samples_per_second": 1821.528, "eval_steps_per_second": 56.94, "step": 721854 }, { "epoch": 43.0, "learning_rate": 2.808890440449177e-06, "loss": 1.0312, "step": 738998 }, { "epoch": 43.0, "eval_loss": 0.9973294138908386, "eval_runtime": 37.9123, "eval_samples_per_second": 1812.472, "eval_steps_per_second": 56.657, "step": 739041 }, { "epoch": 44.0, "learning_rate": 2.4090999010880316e-06, "loss": 1.0278, "step": 756184 }, { "epoch": 44.0, "eval_loss": 0.9942373037338257, "eval_runtime": 37.8136, "eval_samples_per_second": 1817.205, "eval_steps_per_second": 56.805, "step": 756228 }, { "epoch": 45.0, "learning_rate": 2.0093093617268868e-06, "loss": 1.0258, "step": 773370 }, { "epoch": 45.0, "eval_loss": 0.9922175407409668, "eval_runtime": 37.7488, "eval_samples_per_second": 1820.324, "eval_steps_per_second": 56.903, "step": 773415 }, { "epoch": 46.0, "learning_rate": 1.6094955489614245e-06, "loss": 1.024, "step": 790556 }, { "epoch": 46.0, "eval_loss": 0.9902246594429016, "eval_runtime": 37.7011, "eval_samples_per_second": 1822.624, "eval_steps_per_second": 56.974, "step": 790602 }, { "epoch": 47.0, "learning_rate": 1.2096817361959622e-06, "loss": 1.0213, "step": 807742 }, { "epoch": 47.0, "eval_loss": 0.9919160008430481, "eval_runtime": 37.515, "eval_samples_per_second": 1831.667, "eval_steps_per_second": 57.257, "step": 807789 }, { "epoch": 48.0, "learning_rate": 8.09891196834817e-07, "loss": 1.0202, "step": 824928 }, { "epoch": 48.0, "eval_loss": 0.9896969199180603, "eval_runtime": 37.5208, "eval_samples_per_second": 1831.385, "eval_steps_per_second": 57.248, "step": 824976 }, { "epoch": 49.0, "learning_rate": 4.1007738406935476e-07, "loss": 1.0184, "step": 842114 }, { "epoch": 49.0, "eval_loss": 0.9869447350502014, "eval_runtime": 37.5715, "eval_samples_per_second": 1828.912, "eval_steps_per_second": 57.171, "step": 842163 }, { "epoch": 50.0, "learning_rate": 1.0286844708209695e-08, "loss": 1.0185, "step": 859300 }, { "epoch": 50.0, "eval_loss": 0.9913281798362732, "eval_runtime": 38.0547, "eval_samples_per_second": 1805.692, "eval_steps_per_second": 56.445, "step": 859350 } ], "max_steps": 859350, "num_train_epochs": 50, "total_flos": 9.113148657842688e+17, "trial_name": null, "trial_params": null }