en_he_base / trainer_state.json
orendar's picture
update
afdd8c0
raw
history blame
29.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.999983677999577,
"global_step": 612660,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 9.951082166291254e-05,
"loss": 4.9783,
"step": 3000
},
{
"epoch": 0.2,
"learning_rate": 9.902115365782e-05,
"loss": 3.1296,
"step": 6000
},
{
"epoch": 0.29,
"learning_rate": 9.853148565272745e-05,
"loss": 2.7383,
"step": 9000
},
{
"epoch": 0.39,
"learning_rate": 9.804181764763491e-05,
"loss": 2.5447,
"step": 12000
},
{
"epoch": 0.49,
"learning_rate": 9.755231286521073e-05,
"loss": 2.4175,
"step": 15000
},
{
"epoch": 0.59,
"learning_rate": 9.706280808278654e-05,
"loss": 2.3285,
"step": 18000
},
{
"epoch": 0.69,
"learning_rate": 9.657346652303072e-05,
"loss": 2.2597,
"step": 21000
},
{
"epoch": 0.78,
"learning_rate": 9.608396174060654e-05,
"loss": 2.2072,
"step": 24000
},
{
"epoch": 0.88,
"learning_rate": 9.559462018085072e-05,
"loss": 2.1601,
"step": 27000
},
{
"epoch": 0.98,
"learning_rate": 9.510511539842654e-05,
"loss": 2.125,
"step": 30000
},
{
"epoch": 1.0,
"eval_loss": 2.072047233581543,
"eval_runtime": 19.2095,
"eval_samples_per_second": 1536.842,
"eval_steps_per_second": 9.631,
"step": 30633
},
{
"epoch": 1.08,
"learning_rate": 9.461577383867072e-05,
"loss": 2.0771,
"step": 33000
},
{
"epoch": 1.18,
"learning_rate": 9.412626905624655e-05,
"loss": 2.0514,
"step": 36000
},
{
"epoch": 1.27,
"learning_rate": 9.363692749649071e-05,
"loss": 2.0297,
"step": 39000
},
{
"epoch": 1.37,
"learning_rate": 9.314742271406653e-05,
"loss": 2.0123,
"step": 42000
},
{
"epoch": 1.47,
"learning_rate": 9.265791793164235e-05,
"loss": 1.9955,
"step": 45000
},
{
"epoch": 1.57,
"learning_rate": 9.216857637188653e-05,
"loss": 1.9803,
"step": 48000
},
{
"epoch": 1.66,
"learning_rate": 9.167907158946235e-05,
"loss": 1.9667,
"step": 51000
},
{
"epoch": 1.76,
"learning_rate": 9.118973002970654e-05,
"loss": 1.9505,
"step": 54000
},
{
"epoch": 1.86,
"learning_rate": 9.070022524728234e-05,
"loss": 1.9381,
"step": 57000
},
{
"epoch": 1.96,
"learning_rate": 9.021088368752652e-05,
"loss": 1.9288,
"step": 60000
},
{
"epoch": 2.0,
"eval_loss": 1.9032506942749023,
"eval_runtime": 19.1781,
"eval_samples_per_second": 1539.357,
"eval_steps_per_second": 9.646,
"step": 61266
},
{
"epoch": 2.06,
"learning_rate": 8.972137890510235e-05,
"loss": 1.9011,
"step": 63000
},
{
"epoch": 2.15,
"learning_rate": 8.923203734534653e-05,
"loss": 1.8834,
"step": 66000
},
{
"epoch": 2.25,
"learning_rate": 8.874253256292235e-05,
"loss": 1.8807,
"step": 69000
},
{
"epoch": 2.35,
"learning_rate": 8.825319100316653e-05,
"loss": 1.8737,
"step": 72000
},
{
"epoch": 2.45,
"learning_rate": 8.776368622074233e-05,
"loss": 1.8686,
"step": 75000
},
{
"epoch": 2.55,
"learning_rate": 8.727434466098652e-05,
"loss": 1.861,
"step": 78000
},
{
"epoch": 2.64,
"learning_rate": 8.678483987856234e-05,
"loss": 1.8549,
"step": 81000
},
{
"epoch": 2.74,
"learning_rate": 8.629549831880652e-05,
"loss": 1.8503,
"step": 84000
},
{
"epoch": 2.84,
"learning_rate": 8.580599353638234e-05,
"loss": 1.8446,
"step": 87000
},
{
"epoch": 2.94,
"learning_rate": 8.531665197662652e-05,
"loss": 1.8387,
"step": 90000
},
{
"epoch": 3.0,
"eval_loss": 1.833019733428955,
"eval_runtime": 19.0959,
"eval_samples_per_second": 1545.988,
"eval_steps_per_second": 9.688,
"step": 91899
},
{
"epoch": 3.04,
"learning_rate": 8.482714719420234e-05,
"loss": 1.8222,
"step": 93000
},
{
"epoch": 3.13,
"learning_rate": 8.433764241177815e-05,
"loss": 1.8037,
"step": 96000
},
{
"epoch": 3.23,
"learning_rate": 8.384830085202233e-05,
"loss": 1.8017,
"step": 99000
},
{
"epoch": 3.33,
"learning_rate": 8.335879606959815e-05,
"loss": 1.8018,
"step": 102000
},
{
"epoch": 3.43,
"learning_rate": 8.286945450984233e-05,
"loss": 1.7984,
"step": 105000
},
{
"epoch": 3.53,
"learning_rate": 8.237994972741815e-05,
"loss": 1.796,
"step": 108000
},
{
"epoch": 3.62,
"learning_rate": 8.189060816766233e-05,
"loss": 1.7942,
"step": 111000
},
{
"epoch": 3.72,
"learning_rate": 8.140110338523814e-05,
"loss": 1.7905,
"step": 114000
},
{
"epoch": 3.82,
"learning_rate": 8.091176182548232e-05,
"loss": 1.7885,
"step": 117000
},
{
"epoch": 3.92,
"learning_rate": 8.042225704305814e-05,
"loss": 1.7832,
"step": 120000
},
{
"epoch": 4.0,
"eval_loss": 1.7864413261413574,
"eval_runtime": 19.3546,
"eval_samples_per_second": 1525.321,
"eval_steps_per_second": 9.558,
"step": 122532
},
{
"epoch": 4.02,
"learning_rate": 7.993291548330233e-05,
"loss": 1.7754,
"step": 123000
},
{
"epoch": 4.11,
"learning_rate": 7.944341070087814e-05,
"loss": 1.7507,
"step": 126000
},
{
"epoch": 4.21,
"learning_rate": 7.895406914112233e-05,
"loss": 1.7526,
"step": 129000
},
{
"epoch": 4.31,
"learning_rate": 7.846456435869815e-05,
"loss": 1.7541,
"step": 132000
},
{
"epoch": 4.41,
"learning_rate": 7.797522279894231e-05,
"loss": 1.7537,
"step": 135000
},
{
"epoch": 4.5,
"learning_rate": 7.748571801651814e-05,
"loss": 1.7541,
"step": 138000
},
{
"epoch": 4.6,
"learning_rate": 7.699621323409395e-05,
"loss": 1.7505,
"step": 141000
},
{
"epoch": 4.7,
"learning_rate": 7.650687167433813e-05,
"loss": 1.7475,
"step": 144000
},
{
"epoch": 4.8,
"learning_rate": 7.601736689191396e-05,
"loss": 1.7477,
"step": 147000
},
{
"epoch": 4.9,
"learning_rate": 7.552802533215814e-05,
"loss": 1.7461,
"step": 150000
},
{
"epoch": 4.99,
"learning_rate": 7.503852054973394e-05,
"loss": 1.7445,
"step": 153000
},
{
"epoch": 5.0,
"eval_loss": 1.7591967582702637,
"eval_runtime": 19.1359,
"eval_samples_per_second": 1542.756,
"eval_steps_per_second": 9.668,
"step": 153165
},
{
"epoch": 5.09,
"learning_rate": 7.454917898997813e-05,
"loss": 1.7143,
"step": 156000
},
{
"epoch": 5.19,
"learning_rate": 7.405967420755395e-05,
"loss": 1.7177,
"step": 159000
},
{
"epoch": 5.29,
"learning_rate": 7.357033264779813e-05,
"loss": 1.7188,
"step": 162000
},
{
"epoch": 5.39,
"learning_rate": 7.308082786537395e-05,
"loss": 1.7198,
"step": 165000
},
{
"epoch": 5.48,
"learning_rate": 7.259148630561813e-05,
"loss": 1.7202,
"step": 168000
},
{
"epoch": 5.58,
"learning_rate": 7.210198152319395e-05,
"loss": 1.7184,
"step": 171000
},
{
"epoch": 5.68,
"learning_rate": 7.161247674076976e-05,
"loss": 1.719,
"step": 174000
},
{
"epoch": 5.78,
"learning_rate": 7.112313518101394e-05,
"loss": 1.7173,
"step": 177000
},
{
"epoch": 5.88,
"learning_rate": 7.063363039858976e-05,
"loss": 1.7176,
"step": 180000
},
{
"epoch": 5.97,
"learning_rate": 7.014428883883394e-05,
"loss": 1.7152,
"step": 183000
},
{
"epoch": 6.0,
"eval_loss": 1.740378975868225,
"eval_runtime": 19.1537,
"eval_samples_per_second": 1541.325,
"eval_steps_per_second": 9.659,
"step": 183798
},
{
"epoch": 6.07,
"learning_rate": 6.965478405640976e-05,
"loss": 1.6926,
"step": 186000
},
{
"epoch": 6.17,
"learning_rate": 6.916544249665395e-05,
"loss": 1.6889,
"step": 189000
},
{
"epoch": 6.27,
"learning_rate": 6.867593771422975e-05,
"loss": 1.6923,
"step": 192000
},
{
"epoch": 6.37,
"learning_rate": 6.818659615447393e-05,
"loss": 1.693,
"step": 195000
},
{
"epoch": 6.46,
"learning_rate": 6.769709137204976e-05,
"loss": 1.694,
"step": 198000
},
{
"epoch": 6.56,
"learning_rate": 6.720774981229393e-05,
"loss": 1.6948,
"step": 201000
},
{
"epoch": 6.66,
"learning_rate": 6.671824502986975e-05,
"loss": 1.6944,
"step": 204000
},
{
"epoch": 6.76,
"learning_rate": 6.622874024744557e-05,
"loss": 1.6934,
"step": 207000
},
{
"epoch": 6.86,
"learning_rate": 6.573939868768974e-05,
"loss": 1.6926,
"step": 210000
},
{
"epoch": 6.95,
"learning_rate": 6.524989390526556e-05,
"loss": 1.6933,
"step": 213000
},
{
"epoch": 7.0,
"eval_loss": 1.7208322286605835,
"eval_runtime": 19.2921,
"eval_samples_per_second": 1530.262,
"eval_steps_per_second": 9.589,
"step": 214431
},
{
"epoch": 7.05,
"learning_rate": 6.476055234550975e-05,
"loss": 1.6773,
"step": 216000
},
{
"epoch": 7.15,
"learning_rate": 6.427104756308556e-05,
"loss": 1.6671,
"step": 219000
},
{
"epoch": 7.25,
"learning_rate": 6.378170600332975e-05,
"loss": 1.6695,
"step": 222000
},
{
"epoch": 7.35,
"learning_rate": 6.329220122090557e-05,
"loss": 1.6707,
"step": 225000
},
{
"epoch": 7.44,
"learning_rate": 6.280285966114975e-05,
"loss": 1.674,
"step": 228000
},
{
"epoch": 7.54,
"learning_rate": 6.231335487872557e-05,
"loss": 1.6726,
"step": 231000
},
{
"epoch": 7.64,
"learning_rate": 6.182401331896974e-05,
"loss": 1.6739,
"step": 234000
},
{
"epoch": 7.74,
"learning_rate": 6.133450853654555e-05,
"loss": 1.6755,
"step": 237000
},
{
"epoch": 7.83,
"learning_rate": 6.084516697678973e-05,
"loss": 1.6726,
"step": 240000
},
{
"epoch": 7.93,
"learning_rate": 6.035566219436556e-05,
"loss": 1.6743,
"step": 243000
},
{
"epoch": 8.0,
"eval_loss": 1.7004761695861816,
"eval_runtime": 19.351,
"eval_samples_per_second": 1525.608,
"eval_steps_per_second": 9.56,
"step": 245064
},
{
"epoch": 8.03,
"learning_rate": 5.986632063460974e-05,
"loss": 1.6642,
"step": 246000
},
{
"epoch": 8.13,
"learning_rate": 5.9376815852185555e-05,
"loss": 1.6475,
"step": 249000
},
{
"epoch": 8.23,
"learning_rate": 5.888747429242973e-05,
"loss": 1.6525,
"step": 252000
},
{
"epoch": 8.32,
"learning_rate": 5.839796951000556e-05,
"loss": 1.653,
"step": 255000
},
{
"epoch": 8.42,
"learning_rate": 5.7908627950249736e-05,
"loss": 1.6556,
"step": 258000
},
{
"epoch": 8.52,
"learning_rate": 5.741912316782555e-05,
"loss": 1.6556,
"step": 261000
},
{
"epoch": 8.62,
"learning_rate": 5.692961838540136e-05,
"loss": 1.6565,
"step": 264000
},
{
"epoch": 8.72,
"learning_rate": 5.6440276825645545e-05,
"loss": 1.6567,
"step": 267000
},
{
"epoch": 8.81,
"learning_rate": 5.5950772043221364e-05,
"loss": 1.6574,
"step": 270000
},
{
"epoch": 8.91,
"learning_rate": 5.546143048346555e-05,
"loss": 1.6561,
"step": 273000
},
{
"epoch": 9.0,
"eval_loss": 1.6906808614730835,
"eval_runtime": 19.2999,
"eval_samples_per_second": 1529.642,
"eval_steps_per_second": 9.586,
"step": 275697
},
{
"epoch": 9.01,
"learning_rate": 5.497192570104136e-05,
"loss": 1.6546,
"step": 276000
},
{
"epoch": 9.11,
"learning_rate": 5.4482584141285545e-05,
"loss": 1.6302,
"step": 279000
},
{
"epoch": 9.21,
"learning_rate": 5.3993079358861364e-05,
"loss": 1.6356,
"step": 282000
},
{
"epoch": 9.3,
"learning_rate": 5.3503574576437175e-05,
"loss": 1.6375,
"step": 285000
},
{
"epoch": 9.4,
"learning_rate": 5.301423301668136e-05,
"loss": 1.6399,
"step": 288000
},
{
"epoch": 9.5,
"learning_rate": 5.252472823425718e-05,
"loss": 1.6404,
"step": 291000
},
{
"epoch": 9.6,
"learning_rate": 5.203538667450135e-05,
"loss": 1.642,
"step": 294000
},
{
"epoch": 9.7,
"learning_rate": 5.1545881892077175e-05,
"loss": 1.642,
"step": 297000
},
{
"epoch": 9.79,
"learning_rate": 5.105654033232135e-05,
"loss": 1.6421,
"step": 300000
},
{
"epoch": 9.89,
"learning_rate": 5.0567035549897165e-05,
"loss": 1.6417,
"step": 303000
},
{
"epoch": 9.99,
"learning_rate": 5.007753076747299e-05,
"loss": 1.6431,
"step": 306000
},
{
"epoch": 10.0,
"eval_loss": 1.690254807472229,
"eval_runtime": 19.1786,
"eval_samples_per_second": 1539.322,
"eval_steps_per_second": 9.646,
"step": 306330
},
{
"epoch": 10.09,
"learning_rate": 4.9588189207717175e-05,
"loss": 1.6191,
"step": 309000
},
{
"epoch": 10.19,
"learning_rate": 4.909868442529299e-05,
"loss": 1.6215,
"step": 312000
},
{
"epoch": 10.28,
"learning_rate": 4.8609342865537165e-05,
"loss": 1.6247,
"step": 315000
},
{
"epoch": 10.38,
"learning_rate": 4.8119838083112984e-05,
"loss": 1.6244,
"step": 318000
},
{
"epoch": 10.48,
"learning_rate": 4.76303333006888e-05,
"loss": 1.6261,
"step": 321000
},
{
"epoch": 10.58,
"learning_rate": 4.714099174093298e-05,
"loss": 1.6288,
"step": 324000
},
{
"epoch": 10.67,
"learning_rate": 4.66514869585088e-05,
"loss": 1.6289,
"step": 327000
},
{
"epoch": 10.77,
"learning_rate": 4.6162145398752984e-05,
"loss": 1.6295,
"step": 330000
},
{
"epoch": 10.87,
"learning_rate": 4.5672640616328796e-05,
"loss": 1.6295,
"step": 333000
},
{
"epoch": 10.97,
"learning_rate": 4.5183135833904614e-05,
"loss": 1.6282,
"step": 336000
},
{
"epoch": 11.0,
"eval_loss": 1.6800603866577148,
"eval_runtime": 19.1041,
"eval_samples_per_second": 1545.321,
"eval_steps_per_second": 9.684,
"step": 336963
},
{
"epoch": 11.07,
"learning_rate": 4.46937942741488e-05,
"loss": 1.6134,
"step": 339000
},
{
"epoch": 11.16,
"learning_rate": 4.420428949172462e-05,
"loss": 1.6072,
"step": 342000
},
{
"epoch": 11.26,
"learning_rate": 4.371494793196879e-05,
"loss": 1.6099,
"step": 345000
},
{
"epoch": 11.36,
"learning_rate": 4.322544314954461e-05,
"loss": 1.6137,
"step": 348000
},
{
"epoch": 11.46,
"learning_rate": 4.273610158978879e-05,
"loss": 1.6136,
"step": 351000
},
{
"epoch": 11.56,
"learning_rate": 4.224659680736461e-05,
"loss": 1.6151,
"step": 354000
},
{
"epoch": 11.65,
"learning_rate": 4.175725524760879e-05,
"loss": 1.6166,
"step": 357000
},
{
"epoch": 11.75,
"learning_rate": 4.126775046518461e-05,
"loss": 1.6179,
"step": 360000
},
{
"epoch": 11.85,
"learning_rate": 4.077840890542879e-05,
"loss": 1.6174,
"step": 363000
},
{
"epoch": 11.95,
"learning_rate": 4.0288904123004604e-05,
"loss": 1.6173,
"step": 366000
},
{
"epoch": 12.0,
"eval_loss": 1.6714136600494385,
"eval_runtime": 19.2107,
"eval_samples_per_second": 1536.747,
"eval_steps_per_second": 9.63,
"step": 367596
},
{
"epoch": 12.05,
"learning_rate": 3.979939934058042e-05,
"loss": 1.6063,
"step": 369000
},
{
"epoch": 12.14,
"learning_rate": 3.931005778082461e-05,
"loss": 1.5969,
"step": 372000
},
{
"epoch": 12.24,
"learning_rate": 3.882055299840042e-05,
"loss": 1.5998,
"step": 375000
},
{
"epoch": 12.34,
"learning_rate": 3.83312114386446e-05,
"loss": 1.6011,
"step": 378000
},
{
"epoch": 12.44,
"learning_rate": 3.784154343355205e-05,
"loss": 1.6034,
"step": 381000
},
{
"epoch": 12.54,
"learning_rate": 3.7352201873796235e-05,
"loss": 1.6035,
"step": 384000
},
{
"epoch": 12.63,
"learning_rate": 3.686269709137205e-05,
"loss": 1.6054,
"step": 387000
},
{
"epoch": 12.73,
"learning_rate": 3.637335553161623e-05,
"loss": 1.6055,
"step": 390000
},
{
"epoch": 12.83,
"learning_rate": 3.588385074919205e-05,
"loss": 1.6057,
"step": 393000
},
{
"epoch": 12.93,
"learning_rate": 3.539434596676787e-05,
"loss": 1.6061,
"step": 396000
},
{
"epoch": 13.0,
"eval_loss": 1.6634231805801392,
"eval_runtime": 19.238,
"eval_samples_per_second": 1534.564,
"eval_steps_per_second": 9.616,
"step": 398229
},
{
"epoch": 13.03,
"learning_rate": 3.4905004407012046e-05,
"loss": 1.5995,
"step": 399000
},
{
"epoch": 13.12,
"learning_rate": 3.4415499624587865e-05,
"loss": 1.5849,
"step": 402000
},
{
"epoch": 13.22,
"learning_rate": 3.392615806483205e-05,
"loss": 1.5894,
"step": 405000
},
{
"epoch": 13.32,
"learning_rate": 3.343665328240786e-05,
"loss": 1.5914,
"step": 408000
},
{
"epoch": 13.42,
"learning_rate": 3.294731172265204e-05,
"loss": 1.5917,
"step": 411000
},
{
"epoch": 13.51,
"learning_rate": 3.245780694022786e-05,
"loss": 1.5933,
"step": 414000
},
{
"epoch": 13.61,
"learning_rate": 3.196830215780368e-05,
"loss": 1.5926,
"step": 417000
},
{
"epoch": 13.71,
"learning_rate": 3.1478960598047855e-05,
"loss": 1.5956,
"step": 420000
},
{
"epoch": 13.81,
"learning_rate": 3.0989455815623674e-05,
"loss": 1.5953,
"step": 423000
},
{
"epoch": 13.91,
"learning_rate": 3.0500114255867855e-05,
"loss": 1.5971,
"step": 426000
},
{
"epoch": 14.0,
"eval_loss": 1.6542909145355225,
"eval_runtime": 19.1669,
"eval_samples_per_second": 1540.259,
"eval_steps_per_second": 9.652,
"step": 428862
},
{
"epoch": 14.0,
"learning_rate": 3.0010609473443674e-05,
"loss": 1.5959,
"step": 429000
},
{
"epoch": 14.1,
"learning_rate": 2.9521267913687855e-05,
"loss": 1.5752,
"step": 432000
},
{
"epoch": 14.2,
"learning_rate": 2.9031763131263674e-05,
"loss": 1.5775,
"step": 435000
},
{
"epoch": 14.3,
"learning_rate": 2.854225834883949e-05,
"loss": 1.5814,
"step": 438000
},
{
"epoch": 14.4,
"learning_rate": 2.8052916789083673e-05,
"loss": 1.5814,
"step": 441000
},
{
"epoch": 14.49,
"learning_rate": 2.7563412006659482e-05,
"loss": 1.5837,
"step": 444000
},
{
"epoch": 14.59,
"learning_rate": 2.7074070446903667e-05,
"loss": 1.585,
"step": 447000
},
{
"epoch": 14.69,
"learning_rate": 2.6584565664479482e-05,
"loss": 1.5868,
"step": 450000
},
{
"epoch": 14.79,
"learning_rate": 2.60950608820553e-05,
"loss": 1.5854,
"step": 453000
},
{
"epoch": 14.89,
"learning_rate": 2.5605719322299482e-05,
"loss": 1.5864,
"step": 456000
},
{
"epoch": 14.98,
"learning_rate": 2.51162145398753e-05,
"loss": 1.5867,
"step": 459000
},
{
"epoch": 15.0,
"eval_loss": 1.6488285064697266,
"eval_runtime": 19.1177,
"eval_samples_per_second": 1544.224,
"eval_steps_per_second": 9.677,
"step": 459495
},
{
"epoch": 15.08,
"learning_rate": 2.4626709757451116e-05,
"loss": 1.5695,
"step": 462000
},
{
"epoch": 15.18,
"learning_rate": 2.4137368197695297e-05,
"loss": 1.5714,
"step": 465000
},
{
"epoch": 15.28,
"learning_rate": 2.3647863415271113e-05,
"loss": 1.5721,
"step": 468000
},
{
"epoch": 15.38,
"learning_rate": 2.315835863284693e-05,
"loss": 1.5729,
"step": 471000
},
{
"epoch": 15.47,
"learning_rate": 2.2669017073091113e-05,
"loss": 1.5728,
"step": 474000
},
{
"epoch": 15.57,
"learning_rate": 2.2179512290666928e-05,
"loss": 1.5739,
"step": 477000
},
{
"epoch": 15.67,
"learning_rate": 2.1690007508242746e-05,
"loss": 1.5756,
"step": 480000
},
{
"epoch": 15.77,
"learning_rate": 2.1200665948486928e-05,
"loss": 1.5774,
"step": 483000
},
{
"epoch": 15.87,
"learning_rate": 2.0711161166062743e-05,
"loss": 1.5772,
"step": 486000
},
{
"epoch": 15.96,
"learning_rate": 2.022165638363856e-05,
"loss": 1.5781,
"step": 489000
},
{
"epoch": 16.0,
"eval_loss": 1.6446890830993652,
"eval_runtime": 19.0262,
"eval_samples_per_second": 1551.652,
"eval_steps_per_second": 9.723,
"step": 490128
},
{
"epoch": 16.06,
"learning_rate": 1.9732314823882743e-05,
"loss": 1.5652,
"step": 492000
},
{
"epoch": 16.16,
"learning_rate": 1.924281004145856e-05,
"loss": 1.5612,
"step": 495000
},
{
"epoch": 16.26,
"learning_rate": 1.8753305259034377e-05,
"loss": 1.5634,
"step": 498000
},
{
"epoch": 16.35,
"learning_rate": 1.8263800476610192e-05,
"loss": 1.5648,
"step": 501000
},
{
"epoch": 16.45,
"learning_rate": 1.7774458916854374e-05,
"loss": 1.5664,
"step": 504000
},
{
"epoch": 16.55,
"learning_rate": 1.728495413443019e-05,
"loss": 1.5656,
"step": 507000
},
{
"epoch": 16.65,
"learning_rate": 1.679561257467437e-05,
"loss": 1.5676,
"step": 510000
},
{
"epoch": 16.75,
"learning_rate": 1.630610779225019e-05,
"loss": 1.566,
"step": 513000
},
{
"epoch": 16.84,
"learning_rate": 1.5816603009826008e-05,
"loss": 1.5691,
"step": 516000
},
{
"epoch": 16.94,
"learning_rate": 1.5327098227401823e-05,
"loss": 1.5684,
"step": 519000
},
{
"epoch": 17.0,
"eval_loss": 1.6387931108474731,
"eval_runtime": 19.1639,
"eval_samples_per_second": 1540.501,
"eval_steps_per_second": 9.654,
"step": 520761
},
{
"epoch": 17.04,
"learning_rate": 1.4837756667646002e-05,
"loss": 1.5616,
"step": 522000
},
{
"epoch": 17.14,
"learning_rate": 1.434825188522182e-05,
"loss": 1.5545,
"step": 525000
},
{
"epoch": 17.24,
"learning_rate": 1.3858747102797636e-05,
"loss": 1.5551,
"step": 528000
},
{
"epoch": 17.33,
"learning_rate": 1.3369405543041818e-05,
"loss": 1.5558,
"step": 531000
},
{
"epoch": 17.43,
"learning_rate": 1.2879900760617636e-05,
"loss": 1.5587,
"step": 534000
},
{
"epoch": 17.53,
"learning_rate": 1.2390559200861816e-05,
"loss": 1.5585,
"step": 537000
},
{
"epoch": 17.63,
"learning_rate": 1.1901054418437633e-05,
"loss": 1.5579,
"step": 540000
},
{
"epoch": 17.73,
"learning_rate": 1.141154963601345e-05,
"loss": 1.5586,
"step": 543000
},
{
"epoch": 17.82,
"learning_rate": 1.0922208076257631e-05,
"loss": 1.559,
"step": 546000
},
{
"epoch": 17.92,
"learning_rate": 1.0432703293833448e-05,
"loss": 1.5597,
"step": 549000
},
{
"epoch": 18.0,
"eval_loss": 1.6415975093841553,
"eval_runtime": 19.1825,
"eval_samples_per_second": 1539.008,
"eval_steps_per_second": 9.644,
"step": 551394
},
{
"epoch": 18.02,
"learning_rate": 9.94336173407763e-06,
"loss": 1.5579,
"step": 552000
},
{
"epoch": 18.12,
"learning_rate": 9.453856951653447e-06,
"loss": 1.5465,
"step": 555000
},
{
"epoch": 18.22,
"learning_rate": 8.964352169229264e-06,
"loss": 1.5491,
"step": 558000
},
{
"epoch": 18.31,
"learning_rate": 8.475010609473443e-06,
"loss": 1.5495,
"step": 561000
},
{
"epoch": 18.41,
"learning_rate": 7.985505827049262e-06,
"loss": 1.5498,
"step": 564000
},
{
"epoch": 18.51,
"learning_rate": 7.496001044625078e-06,
"loss": 1.5514,
"step": 567000
},
{
"epoch": 18.61,
"learning_rate": 7.006659484869258e-06,
"loss": 1.5508,
"step": 570000
},
{
"epoch": 18.71,
"learning_rate": 6.517154702445076e-06,
"loss": 1.5516,
"step": 573000
},
{
"epoch": 18.8,
"learning_rate": 6.027649920020892e-06,
"loss": 1.5508,
"step": 576000
},
{
"epoch": 18.9,
"learning_rate": 5.538308360265074e-06,
"loss": 1.5515,
"step": 579000
},
{
"epoch": 19.0,
"learning_rate": 5.048803577840891e-06,
"loss": 1.5521,
"step": 582000
},
{
"epoch": 19.0,
"eval_loss": 1.6370400190353394,
"eval_runtime": 19.1163,
"eval_samples_per_second": 1544.337,
"eval_steps_per_second": 9.678,
"step": 582027
},
{
"epoch": 19.1,
"learning_rate": 4.559298795416708e-06,
"loss": 1.5413,
"step": 585000
},
{
"epoch": 19.19,
"learning_rate": 4.069957235660889e-06,
"loss": 1.5435,
"step": 588000
},
{
"epoch": 19.29,
"learning_rate": 3.580452453236706e-06,
"loss": 1.5432,
"step": 591000
},
{
"epoch": 19.39,
"learning_rate": 3.091110893480887e-06,
"loss": 1.5437,
"step": 594000
},
{
"epoch": 19.49,
"learning_rate": 2.6016061110567034e-06,
"loss": 1.5431,
"step": 597000
},
{
"epoch": 19.59,
"learning_rate": 2.1121013286325204e-06,
"loss": 1.5431,
"step": 600000
},
{
"epoch": 19.68,
"learning_rate": 1.6225965462083374e-06,
"loss": 1.5441,
"step": 603000
},
{
"epoch": 19.78,
"learning_rate": 1.1332549864525185e-06,
"loss": 1.544,
"step": 606000
},
{
"epoch": 19.88,
"learning_rate": 6.437502040283355e-07,
"loss": 1.5469,
"step": 609000
},
{
"epoch": 19.98,
"learning_rate": 1.5440864427251657e-07,
"loss": 1.5438,
"step": 612000
},
{
"epoch": 20.0,
"eval_loss": 1.636548638343811,
"eval_runtime": 19.1335,
"eval_samples_per_second": 1542.949,
"eval_steps_per_second": 9.669,
"step": 612660
},
{
"epoch": 20.0,
"step": 612660,
"total_flos": 3.3229272051886326e+18,
"train_loss": 1.7127611959194204,
"train_runtime": 370998.644,
"train_samples_per_second": 528.445,
"train_steps_per_second": 1.651
}
],
"max_steps": 612660,
"num_train_epochs": 20,
"total_flos": 3.3229272051886326e+18,
"trial_name": null,
"trial_params": null
}