edanigoben's picture
(16)
19a759f
{
"best_metric": 0.9869447350502014,
"best_model_checkpoint": "./output_c/checkpoint-842163",
"epoch": 50.0,
"global_step": 859350,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.960016291383022e-05,
"loss": 2.1017,
"step": 17186
},
{
"epoch": 1.0,
"eval_loss": 1.6622060537338257,
"eval_runtime": 41.4466,
"eval_samples_per_second": 1657.917,
"eval_steps_per_second": 51.826,
"step": 17187
},
{
"epoch": 2.0,
"learning_rate": 1.920034910106476e-05,
"loss": 1.684,
"step": 34372
},
{
"epoch": 2.0,
"eval_loss": 1.5238641500473022,
"eval_runtime": 41.103,
"eval_samples_per_second": 1671.774,
"eval_steps_per_second": 52.259,
"step": 34374
},
{
"epoch": 3.0,
"learning_rate": 1.8800558561703613e-05,
"loss": 1.5681,
"step": 51558
},
{
"epoch": 3.0,
"eval_loss": 1.4425514936447144,
"eval_runtime": 40.7311,
"eval_samples_per_second": 1687.039,
"eval_steps_per_second": 52.736,
"step": 51561
},
{
"epoch": 4.0,
"learning_rate": 1.840076802234247e-05,
"loss": 1.495,
"step": 68744
},
{
"epoch": 4.0,
"eval_loss": 1.3773192167282104,
"eval_runtime": 40.0387,
"eval_samples_per_second": 1716.214,
"eval_steps_per_second": 53.648,
"step": 68748
},
{
"epoch": 5.0,
"learning_rate": 1.8001000756385642e-05,
"loss": 1.4413,
"step": 85930
},
{
"epoch": 5.0,
"eval_loss": 1.3373253345489502,
"eval_runtime": 40.4299,
"eval_samples_per_second": 1699.61,
"eval_steps_per_second": 53.129,
"step": 85935
},
{
"epoch": 6.0,
"learning_rate": 1.7601186943620177e-05,
"loss": 1.3994,
"step": 103116
},
{
"epoch": 6.0,
"eval_loss": 1.3044638633728027,
"eval_runtime": 40.8305,
"eval_samples_per_second": 1682.934,
"eval_steps_per_second": 52.608,
"step": 103122
},
{
"epoch": 7.0,
"learning_rate": 1.7201396404259034e-05,
"loss": 1.3648,
"step": 120302
},
{
"epoch": 7.0,
"eval_loss": 1.2776345014572144,
"eval_runtime": 40.5929,
"eval_samples_per_second": 1692.784,
"eval_steps_per_second": 52.916,
"step": 120309
},
{
"epoch": 8.0,
"learning_rate": 1.680160586489789e-05,
"loss": 1.3356,
"step": 137488
},
{
"epoch": 8.0,
"eval_loss": 1.2525078058242798,
"eval_runtime": 41.1415,
"eval_samples_per_second": 1670.213,
"eval_steps_per_second": 52.21,
"step": 137496
},
{
"epoch": 9.0,
"learning_rate": 1.640179205213243e-05,
"loss": 1.3103,
"step": 154674
},
{
"epoch": 9.0,
"eval_loss": 1.229679822921753,
"eval_runtime": 40.8721,
"eval_samples_per_second": 1681.22,
"eval_steps_per_second": 52.554,
"step": 154683
},
{
"epoch": 10.0,
"learning_rate": 1.6002001512771283e-05,
"loss": 1.287,
"step": 171860
},
{
"epoch": 10.0,
"eval_loss": 1.212567925453186,
"eval_runtime": 39.8522,
"eval_samples_per_second": 1724.248,
"eval_steps_per_second": 53.899,
"step": 171870
},
{
"epoch": 11.0,
"learning_rate": 1.560218770000582e-05,
"loss": 1.2684,
"step": 189046
},
{
"epoch": 11.0,
"eval_loss": 1.194634199142456,
"eval_runtime": 39.8234,
"eval_samples_per_second": 1725.494,
"eval_steps_per_second": 53.938,
"step": 189057
},
{
"epoch": 12.0,
"learning_rate": 1.5202373887240358e-05,
"loss": 1.2503,
"step": 206232
},
{
"epoch": 12.0,
"eval_loss": 1.181823968887329,
"eval_runtime": 40.2477,
"eval_samples_per_second": 1707.301,
"eval_steps_per_second": 53.369,
"step": 206244
},
{
"epoch": 13.0,
"learning_rate": 1.4802606621283528e-05,
"loss": 1.2338,
"step": 223418
},
{
"epoch": 13.0,
"eval_loss": 1.1666418313980103,
"eval_runtime": 40.5887,
"eval_samples_per_second": 1692.959,
"eval_steps_per_second": 52.921,
"step": 223431
},
{
"epoch": 14.0,
"learning_rate": 1.4402816081922383e-05,
"loss": 1.2175,
"step": 240604
},
{
"epoch": 14.0,
"eval_loss": 1.1534117460250854,
"eval_runtime": 40.4201,
"eval_samples_per_second": 1700.022,
"eval_steps_per_second": 53.142,
"step": 240618
},
{
"epoch": 15.0,
"learning_rate": 1.4003048815965557e-05,
"loss": 1.2051,
"step": 257790
},
{
"epoch": 15.0,
"eval_loss": 1.1395701169967651,
"eval_runtime": 40.5078,
"eval_samples_per_second": 1696.341,
"eval_steps_per_second": 53.027,
"step": 257805
},
{
"epoch": 16.0,
"learning_rate": 1.3603258276604412e-05,
"loss": 1.1938,
"step": 274976
},
{
"epoch": 16.0,
"eval_loss": 1.1312499046325684,
"eval_runtime": 40.784,
"eval_samples_per_second": 1684.852,
"eval_steps_per_second": 52.668,
"step": 274992
},
{
"epoch": 17.0,
"learning_rate": 1.3203491010647584e-05,
"loss": 1.182,
"step": 292162
},
{
"epoch": 17.0,
"eval_loss": 1.1207791566848755,
"eval_runtime": 40.735,
"eval_samples_per_second": 1686.879,
"eval_steps_per_second": 52.731,
"step": 292179
},
{
"epoch": 18.0,
"learning_rate": 1.2803700471286437e-05,
"loss": 1.1706,
"step": 309348
},
{
"epoch": 18.0,
"eval_loss": 1.11408269405365,
"eval_runtime": 40.5382,
"eval_samples_per_second": 1695.07,
"eval_steps_per_second": 52.987,
"step": 309366
},
{
"epoch": 19.0,
"learning_rate": 1.2403886658520976e-05,
"loss": 1.1597,
"step": 326534
},
{
"epoch": 19.0,
"eval_loss": 1.1045416593551636,
"eval_runtime": 41.1519,
"eval_samples_per_second": 1669.791,
"eval_steps_per_second": 52.197,
"step": 326553
},
{
"epoch": 20.0,
"learning_rate": 1.2004096119159831e-05,
"loss": 1.1519,
"step": 343720
},
{
"epoch": 20.0,
"eval_loss": 1.0968821048736572,
"eval_runtime": 40.942,
"eval_samples_per_second": 1678.35,
"eval_steps_per_second": 52.464,
"step": 343740
},
{
"epoch": 21.0,
"learning_rate": 1.1604305579798688e-05,
"loss": 1.1436,
"step": 360906
},
{
"epoch": 21.0,
"eval_loss": 1.0863711833953857,
"eval_runtime": 40.8058,
"eval_samples_per_second": 1683.95,
"eval_steps_per_second": 52.64,
"step": 360927
},
{
"epoch": 22.0,
"learning_rate": 1.120451504043754e-05,
"loss": 1.1336,
"step": 378092
},
{
"epoch": 22.0,
"eval_loss": 1.082047939300537,
"eval_runtime": 40.4735,
"eval_samples_per_second": 1697.777,
"eval_steps_per_second": 53.072,
"step": 378114
},
{
"epoch": 23.0,
"learning_rate": 1.0804701227672078e-05,
"loss": 1.1265,
"step": 395278
},
{
"epoch": 23.0,
"eval_loss": 1.0744354724884033,
"eval_runtime": 41.164,
"eval_samples_per_second": 1669.298,
"eval_steps_per_second": 52.182,
"step": 395301
},
{
"epoch": 24.0,
"learning_rate": 1.0404910688310935e-05,
"loss": 1.119,
"step": 412464
},
{
"epoch": 24.0,
"eval_loss": 1.0701904296875,
"eval_runtime": 40.4778,
"eval_samples_per_second": 1697.599,
"eval_steps_per_second": 53.066,
"step": 412488
},
{
"epoch": 25.0,
"learning_rate": 1.000512014894979e-05,
"loss": 1.1117,
"step": 429650
},
{
"epoch": 25.0,
"eval_loss": 1.062601089477539,
"eval_runtime": 40.2464,
"eval_samples_per_second": 1707.357,
"eval_steps_per_second": 53.371,
"step": 429675
},
{
"epoch": 26.0,
"learning_rate": 9.60535288299296e-06,
"loss": 1.1048,
"step": 446836
},
{
"epoch": 26.0,
"eval_loss": 1.0572917461395264,
"eval_runtime": 39.7303,
"eval_samples_per_second": 1729.537,
"eval_steps_per_second": 54.065,
"step": 446862
},
{
"epoch": 27.0,
"learning_rate": 9.205562343631815e-06,
"loss": 1.098,
"step": 464022
},
{
"epoch": 27.0,
"eval_loss": 1.0520741939544678,
"eval_runtime": 39.4819,
"eval_samples_per_second": 1740.42,
"eval_steps_per_second": 54.405,
"step": 464049
},
{
"epoch": 28.0,
"learning_rate": 8.80577180427067e-06,
"loss": 1.0924,
"step": 481208
},
{
"epoch": 28.0,
"eval_loss": 1.0490448474884033,
"eval_runtime": 39.5503,
"eval_samples_per_second": 1737.408,
"eval_steps_per_second": 54.311,
"step": 481236
},
{
"epoch": 29.0,
"learning_rate": 8.405981264909526e-06,
"loss": 1.087,
"step": 498394
},
{
"epoch": 29.0,
"eval_loss": 1.0411999225616455,
"eval_runtime": 39.4139,
"eval_samples_per_second": 1743.421,
"eval_steps_per_second": 54.499,
"step": 498423
},
{
"epoch": 30.0,
"learning_rate": 8.006213998952698e-06,
"loss": 1.0812,
"step": 515580
},
{
"epoch": 30.0,
"eval_loss": 1.0427082777023315,
"eval_runtime": 40.8396,
"eval_samples_per_second": 1682.558,
"eval_steps_per_second": 52.596,
"step": 515610
},
{
"epoch": 31.0,
"learning_rate": 7.606423459591552e-06,
"loss": 1.076,
"step": 532766
},
{
"epoch": 31.0,
"eval_loss": 1.031008243560791,
"eval_runtime": 39.0796,
"eval_samples_per_second": 1758.334,
"eval_steps_per_second": 54.965,
"step": 532797
},
{
"epoch": 32.0,
"learning_rate": 7.206632920230407e-06,
"loss": 1.0707,
"step": 549952
},
{
"epoch": 32.0,
"eval_loss": 1.0325006246566772,
"eval_runtime": 38.4227,
"eval_samples_per_second": 1788.396,
"eval_steps_per_second": 55.904,
"step": 549984
},
{
"epoch": 33.0,
"learning_rate": 6.806842380869262e-06,
"loss": 1.0654,
"step": 567138
},
{
"epoch": 33.0,
"eval_loss": 1.021201252937317,
"eval_runtime": 37.9747,
"eval_samples_per_second": 1809.492,
"eval_steps_per_second": 56.564,
"step": 567171
},
{
"epoch": 34.0,
"learning_rate": 6.407051841508117e-06,
"loss": 1.0609,
"step": 584324
},
{
"epoch": 34.0,
"eval_loss": 1.0170767307281494,
"eval_runtime": 38.0157,
"eval_samples_per_second": 1807.544,
"eval_steps_per_second": 56.503,
"step": 584358
},
{
"epoch": 35.0,
"learning_rate": 6.007238028742655e-06,
"loss": 1.0572,
"step": 601510
},
{
"epoch": 35.0,
"eval_loss": 1.0200223922729492,
"eval_runtime": 38.7209,
"eval_samples_per_second": 1774.625,
"eval_steps_per_second": 55.474,
"step": 601545
},
{
"epoch": 36.0,
"learning_rate": 5.607470762785827e-06,
"loss": 1.0528,
"step": 618696
},
{
"epoch": 36.0,
"eval_loss": 1.0177444219589233,
"eval_runtime": 38.1852,
"eval_samples_per_second": 1799.519,
"eval_steps_per_second": 56.252,
"step": 618732
},
{
"epoch": 37.0,
"learning_rate": 5.207656950020364e-06,
"loss": 1.0485,
"step": 635882
},
{
"epoch": 37.0,
"eval_loss": 1.0121095180511475,
"eval_runtime": 37.9666,
"eval_samples_per_second": 1809.879,
"eval_steps_per_second": 56.576,
"step": 635919
},
{
"epoch": 38.0,
"learning_rate": 4.807889684063537e-06,
"loss": 1.0456,
"step": 653068
},
{
"epoch": 38.0,
"eval_loss": 1.0075100660324097,
"eval_runtime": 37.7745,
"eval_samples_per_second": 1819.084,
"eval_steps_per_second": 56.864,
"step": 653106
},
{
"epoch": 39.0,
"learning_rate": 4.408075871298074e-06,
"loss": 1.0436,
"step": 670254
},
{
"epoch": 39.0,
"eval_loss": 1.0085304975509644,
"eval_runtime": 37.8436,
"eval_samples_per_second": 1815.761,
"eval_steps_per_second": 56.76,
"step": 670293
},
{
"epoch": 40.0,
"learning_rate": 4.008308605341247e-06,
"loss": 1.0403,
"step": 687440
},
{
"epoch": 40.0,
"eval_loss": 1.0014568567276,
"eval_runtime": 37.7063,
"eval_samples_per_second": 1822.377,
"eval_steps_per_second": 56.967,
"step": 687480
},
{
"epoch": 41.0,
"learning_rate": 3.608494792575784e-06,
"loss": 1.0358,
"step": 704626
},
{
"epoch": 41.0,
"eval_loss": 0.9997268915176392,
"eval_runtime": 37.9829,
"eval_samples_per_second": 1809.103,
"eval_steps_per_second": 56.552,
"step": 704667
},
{
"epoch": 42.0,
"learning_rate": 3.208704253214639e-06,
"loss": 1.0339,
"step": 721812
},
{
"epoch": 42.0,
"eval_loss": 1.0009877681732178,
"eval_runtime": 37.7238,
"eval_samples_per_second": 1821.528,
"eval_steps_per_second": 56.94,
"step": 721854
},
{
"epoch": 43.0,
"learning_rate": 2.808890440449177e-06,
"loss": 1.0312,
"step": 738998
},
{
"epoch": 43.0,
"eval_loss": 0.9973294138908386,
"eval_runtime": 37.9123,
"eval_samples_per_second": 1812.472,
"eval_steps_per_second": 56.657,
"step": 739041
},
{
"epoch": 44.0,
"learning_rate": 2.4090999010880316e-06,
"loss": 1.0278,
"step": 756184
},
{
"epoch": 44.0,
"eval_loss": 0.9942373037338257,
"eval_runtime": 37.8136,
"eval_samples_per_second": 1817.205,
"eval_steps_per_second": 56.805,
"step": 756228
},
{
"epoch": 45.0,
"learning_rate": 2.0093093617268868e-06,
"loss": 1.0258,
"step": 773370
},
{
"epoch": 45.0,
"eval_loss": 0.9922175407409668,
"eval_runtime": 37.7488,
"eval_samples_per_second": 1820.324,
"eval_steps_per_second": 56.903,
"step": 773415
},
{
"epoch": 46.0,
"learning_rate": 1.6094955489614245e-06,
"loss": 1.024,
"step": 790556
},
{
"epoch": 46.0,
"eval_loss": 0.9902246594429016,
"eval_runtime": 37.7011,
"eval_samples_per_second": 1822.624,
"eval_steps_per_second": 56.974,
"step": 790602
},
{
"epoch": 47.0,
"learning_rate": 1.2096817361959622e-06,
"loss": 1.0213,
"step": 807742
},
{
"epoch": 47.0,
"eval_loss": 0.9919160008430481,
"eval_runtime": 37.515,
"eval_samples_per_second": 1831.667,
"eval_steps_per_second": 57.257,
"step": 807789
},
{
"epoch": 48.0,
"learning_rate": 8.09891196834817e-07,
"loss": 1.0202,
"step": 824928
},
{
"epoch": 48.0,
"eval_loss": 0.9896969199180603,
"eval_runtime": 37.5208,
"eval_samples_per_second": 1831.385,
"eval_steps_per_second": 57.248,
"step": 824976
},
{
"epoch": 49.0,
"learning_rate": 4.1007738406935476e-07,
"loss": 1.0184,
"step": 842114
},
{
"epoch": 49.0,
"eval_loss": 0.9869447350502014,
"eval_runtime": 37.5715,
"eval_samples_per_second": 1828.912,
"eval_steps_per_second": 57.171,
"step": 842163
},
{
"epoch": 50.0,
"learning_rate": 1.0286844708209695e-08,
"loss": 1.0185,
"step": 859300
},
{
"epoch": 50.0,
"eval_loss": 0.9913281798362732,
"eval_runtime": 38.0547,
"eval_samples_per_second": 1805.692,
"eval_steps_per_second": 56.445,
"step": 859350
}
],
"max_steps": 859350,
"num_train_epochs": 50,
"total_flos": 9.113148657842688e+17,
"trial_name": null,
"trial_params": null
}