|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.999983677999577, |
|
"global_step": 612660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.951082166291254e-05, |
|
"loss": 4.9783, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.902115365782e-05, |
|
"loss": 3.1296, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.853148565272745e-05, |
|
"loss": 2.7383, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.804181764763491e-05, |
|
"loss": 2.5447, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.755231286521073e-05, |
|
"loss": 2.4175, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.706280808278654e-05, |
|
"loss": 2.3285, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.657346652303072e-05, |
|
"loss": 2.2597, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.608396174060654e-05, |
|
"loss": 2.2072, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.559462018085072e-05, |
|
"loss": 2.1601, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.510511539842654e-05, |
|
"loss": 2.125, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.072047233581543, |
|
"eval_runtime": 19.2095, |
|
"eval_samples_per_second": 1536.842, |
|
"eval_steps_per_second": 9.631, |
|
"step": 30633 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.461577383867072e-05, |
|
"loss": 2.0771, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 9.412626905624655e-05, |
|
"loss": 2.0514, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.363692749649071e-05, |
|
"loss": 2.0297, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.314742271406653e-05, |
|
"loss": 2.0123, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.265791793164235e-05, |
|
"loss": 1.9955, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.216857637188653e-05, |
|
"loss": 1.9803, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.167907158946235e-05, |
|
"loss": 1.9667, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 9.118973002970654e-05, |
|
"loss": 1.9505, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.070022524728234e-05, |
|
"loss": 1.9381, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.021088368752652e-05, |
|
"loss": 1.9288, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.9032506942749023, |
|
"eval_runtime": 19.1781, |
|
"eval_samples_per_second": 1539.357, |
|
"eval_steps_per_second": 9.646, |
|
"step": 61266 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 8.972137890510235e-05, |
|
"loss": 1.9011, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.923203734534653e-05, |
|
"loss": 1.8834, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.874253256292235e-05, |
|
"loss": 1.8807, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.825319100316653e-05, |
|
"loss": 1.8737, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 8.776368622074233e-05, |
|
"loss": 1.8686, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.727434466098652e-05, |
|
"loss": 1.861, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.678483987856234e-05, |
|
"loss": 1.8549, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.629549831880652e-05, |
|
"loss": 1.8503, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 8.580599353638234e-05, |
|
"loss": 1.8446, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.531665197662652e-05, |
|
"loss": 1.8387, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.833019733428955, |
|
"eval_runtime": 19.0959, |
|
"eval_samples_per_second": 1545.988, |
|
"eval_steps_per_second": 9.688, |
|
"step": 91899 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.482714719420234e-05, |
|
"loss": 1.8222, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.433764241177815e-05, |
|
"loss": 1.8037, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 8.384830085202233e-05, |
|
"loss": 1.8017, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 8.335879606959815e-05, |
|
"loss": 1.8018, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 8.286945450984233e-05, |
|
"loss": 1.7984, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 8.237994972741815e-05, |
|
"loss": 1.796, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 8.189060816766233e-05, |
|
"loss": 1.7942, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 8.140110338523814e-05, |
|
"loss": 1.7905, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 8.091176182548232e-05, |
|
"loss": 1.7885, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 8.042225704305814e-05, |
|
"loss": 1.7832, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.7864413261413574, |
|
"eval_runtime": 19.3546, |
|
"eval_samples_per_second": 1525.321, |
|
"eval_steps_per_second": 9.558, |
|
"step": 122532 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 7.993291548330233e-05, |
|
"loss": 1.7754, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 7.944341070087814e-05, |
|
"loss": 1.7507, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.895406914112233e-05, |
|
"loss": 1.7526, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 7.846456435869815e-05, |
|
"loss": 1.7541, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 7.797522279894231e-05, |
|
"loss": 1.7537, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 7.748571801651814e-05, |
|
"loss": 1.7541, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 7.699621323409395e-05, |
|
"loss": 1.7505, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 7.650687167433813e-05, |
|
"loss": 1.7475, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 7.601736689191396e-05, |
|
"loss": 1.7477, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 7.552802533215814e-05, |
|
"loss": 1.7461, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 7.503852054973394e-05, |
|
"loss": 1.7445, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.7591967582702637, |
|
"eval_runtime": 19.1359, |
|
"eval_samples_per_second": 1542.756, |
|
"eval_steps_per_second": 9.668, |
|
"step": 153165 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 7.454917898997813e-05, |
|
"loss": 1.7143, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 7.405967420755395e-05, |
|
"loss": 1.7177, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 7.357033264779813e-05, |
|
"loss": 1.7188, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 7.308082786537395e-05, |
|
"loss": 1.7198, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 7.259148630561813e-05, |
|
"loss": 1.7202, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 7.210198152319395e-05, |
|
"loss": 1.7184, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 7.161247674076976e-05, |
|
"loss": 1.719, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 7.112313518101394e-05, |
|
"loss": 1.7173, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 7.063363039858976e-05, |
|
"loss": 1.7176, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 7.014428883883394e-05, |
|
"loss": 1.7152, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.740378975868225, |
|
"eval_runtime": 19.1537, |
|
"eval_samples_per_second": 1541.325, |
|
"eval_steps_per_second": 9.659, |
|
"step": 183798 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 6.965478405640976e-05, |
|
"loss": 1.6926, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 6.916544249665395e-05, |
|
"loss": 1.6889, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 6.867593771422975e-05, |
|
"loss": 1.6923, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 6.818659615447393e-05, |
|
"loss": 1.693, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 6.769709137204976e-05, |
|
"loss": 1.694, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 6.720774981229393e-05, |
|
"loss": 1.6948, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 6.671824502986975e-05, |
|
"loss": 1.6944, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 6.622874024744557e-05, |
|
"loss": 1.6934, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 6.573939868768974e-05, |
|
"loss": 1.6926, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 6.524989390526556e-05, |
|
"loss": 1.6933, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.7208322286605835, |
|
"eval_runtime": 19.2921, |
|
"eval_samples_per_second": 1530.262, |
|
"eval_steps_per_second": 9.589, |
|
"step": 214431 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 6.476055234550975e-05, |
|
"loss": 1.6773, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 6.427104756308556e-05, |
|
"loss": 1.6671, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 6.378170600332975e-05, |
|
"loss": 1.6695, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 6.329220122090557e-05, |
|
"loss": 1.6707, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 6.280285966114975e-05, |
|
"loss": 1.674, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 6.231335487872557e-05, |
|
"loss": 1.6726, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 6.182401331896974e-05, |
|
"loss": 1.6739, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 6.133450853654555e-05, |
|
"loss": 1.6755, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 6.084516697678973e-05, |
|
"loss": 1.6726, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 6.035566219436556e-05, |
|
"loss": 1.6743, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.7004761695861816, |
|
"eval_runtime": 19.351, |
|
"eval_samples_per_second": 1525.608, |
|
"eval_steps_per_second": 9.56, |
|
"step": 245064 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 5.986632063460974e-05, |
|
"loss": 1.6642, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 5.9376815852185555e-05, |
|
"loss": 1.6475, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 5.888747429242973e-05, |
|
"loss": 1.6525, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 5.839796951000556e-05, |
|
"loss": 1.653, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 5.7908627950249736e-05, |
|
"loss": 1.6556, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 5.741912316782555e-05, |
|
"loss": 1.6556, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 5.692961838540136e-05, |
|
"loss": 1.6565, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 5.6440276825645545e-05, |
|
"loss": 1.6567, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 5.5950772043221364e-05, |
|
"loss": 1.6574, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 5.546143048346555e-05, |
|
"loss": 1.6561, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.6906808614730835, |
|
"eval_runtime": 19.2999, |
|
"eval_samples_per_second": 1529.642, |
|
"eval_steps_per_second": 9.586, |
|
"step": 275697 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 5.497192570104136e-05, |
|
"loss": 1.6546, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 5.4482584141285545e-05, |
|
"loss": 1.6302, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 5.3993079358861364e-05, |
|
"loss": 1.6356, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 5.3503574576437175e-05, |
|
"loss": 1.6375, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 5.301423301668136e-05, |
|
"loss": 1.6399, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 5.252472823425718e-05, |
|
"loss": 1.6404, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5.203538667450135e-05, |
|
"loss": 1.642, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 5.1545881892077175e-05, |
|
"loss": 1.642, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 5.105654033232135e-05, |
|
"loss": 1.6421, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 5.0567035549897165e-05, |
|
"loss": 1.6417, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 5.007753076747299e-05, |
|
"loss": 1.6431, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.690254807472229, |
|
"eval_runtime": 19.1786, |
|
"eval_samples_per_second": 1539.322, |
|
"eval_steps_per_second": 9.646, |
|
"step": 306330 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 4.9588189207717175e-05, |
|
"loss": 1.6191, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 4.909868442529299e-05, |
|
"loss": 1.6215, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 4.8609342865537165e-05, |
|
"loss": 1.6247, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 4.8119838083112984e-05, |
|
"loss": 1.6244, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 4.76303333006888e-05, |
|
"loss": 1.6261, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 4.714099174093298e-05, |
|
"loss": 1.6288, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 4.66514869585088e-05, |
|
"loss": 1.6289, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 4.6162145398752984e-05, |
|
"loss": 1.6295, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 4.5672640616328796e-05, |
|
"loss": 1.6295, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 4.5183135833904614e-05, |
|
"loss": 1.6282, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.6800603866577148, |
|
"eval_runtime": 19.1041, |
|
"eval_samples_per_second": 1545.321, |
|
"eval_steps_per_second": 9.684, |
|
"step": 336963 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 4.46937942741488e-05, |
|
"loss": 1.6134, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 4.420428949172462e-05, |
|
"loss": 1.6072, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 4.371494793196879e-05, |
|
"loss": 1.6099, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 4.322544314954461e-05, |
|
"loss": 1.6137, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 4.273610158978879e-05, |
|
"loss": 1.6136, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 4.224659680736461e-05, |
|
"loss": 1.6151, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 4.175725524760879e-05, |
|
"loss": 1.6166, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 4.126775046518461e-05, |
|
"loss": 1.6179, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 4.077840890542879e-05, |
|
"loss": 1.6174, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 4.0288904123004604e-05, |
|
"loss": 1.6173, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.6714136600494385, |
|
"eval_runtime": 19.2107, |
|
"eval_samples_per_second": 1536.747, |
|
"eval_steps_per_second": 9.63, |
|
"step": 367596 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 3.979939934058042e-05, |
|
"loss": 1.6063, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 3.931005778082461e-05, |
|
"loss": 1.5969, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 3.882055299840042e-05, |
|
"loss": 1.5998, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 3.83312114386446e-05, |
|
"loss": 1.6011, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 3.784154343355205e-05, |
|
"loss": 1.6034, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 3.7352201873796235e-05, |
|
"loss": 1.6035, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 3.686269709137205e-05, |
|
"loss": 1.6054, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 3.637335553161623e-05, |
|
"loss": 1.6055, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 3.588385074919205e-05, |
|
"loss": 1.6057, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 3.539434596676787e-05, |
|
"loss": 1.6061, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.6634231805801392, |
|
"eval_runtime": 19.238, |
|
"eval_samples_per_second": 1534.564, |
|
"eval_steps_per_second": 9.616, |
|
"step": 398229 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 3.4905004407012046e-05, |
|
"loss": 1.5995, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 3.4415499624587865e-05, |
|
"loss": 1.5849, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 3.392615806483205e-05, |
|
"loss": 1.5894, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 3.343665328240786e-05, |
|
"loss": 1.5914, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 13.42, |
|
"learning_rate": 3.294731172265204e-05, |
|
"loss": 1.5917, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 3.245780694022786e-05, |
|
"loss": 1.5933, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 3.196830215780368e-05, |
|
"loss": 1.5926, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 3.1478960598047855e-05, |
|
"loss": 1.5956, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 3.0989455815623674e-05, |
|
"loss": 1.5953, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 3.0500114255867855e-05, |
|
"loss": 1.5971, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.6542909145355225, |
|
"eval_runtime": 19.1669, |
|
"eval_samples_per_second": 1540.259, |
|
"eval_steps_per_second": 9.652, |
|
"step": 428862 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.0010609473443674e-05, |
|
"loss": 1.5959, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 2.9521267913687855e-05, |
|
"loss": 1.5752, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 2.9031763131263674e-05, |
|
"loss": 1.5775, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 2.854225834883949e-05, |
|
"loss": 1.5814, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 2.8052916789083673e-05, |
|
"loss": 1.5814, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 2.7563412006659482e-05, |
|
"loss": 1.5837, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 2.7074070446903667e-05, |
|
"loss": 1.585, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 2.6584565664479482e-05, |
|
"loss": 1.5868, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 2.60950608820553e-05, |
|
"loss": 1.5854, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 2.5605719322299482e-05, |
|
"loss": 1.5864, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 2.51162145398753e-05, |
|
"loss": 1.5867, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.6488285064697266, |
|
"eval_runtime": 19.1177, |
|
"eval_samples_per_second": 1544.224, |
|
"eval_steps_per_second": 9.677, |
|
"step": 459495 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 2.4626709757451116e-05, |
|
"loss": 1.5695, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"learning_rate": 2.4137368197695297e-05, |
|
"loss": 1.5714, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 15.28, |
|
"learning_rate": 2.3647863415271113e-05, |
|
"loss": 1.5721, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 2.315835863284693e-05, |
|
"loss": 1.5729, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 2.2669017073091113e-05, |
|
"loss": 1.5728, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 2.2179512290666928e-05, |
|
"loss": 1.5739, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 2.1690007508242746e-05, |
|
"loss": 1.5756, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"learning_rate": 2.1200665948486928e-05, |
|
"loss": 1.5774, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 2.0711161166062743e-05, |
|
"loss": 1.5772, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 2.022165638363856e-05, |
|
"loss": 1.5781, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.6446890830993652, |
|
"eval_runtime": 19.0262, |
|
"eval_samples_per_second": 1551.652, |
|
"eval_steps_per_second": 9.723, |
|
"step": 490128 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 1.9732314823882743e-05, |
|
"loss": 1.5652, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"learning_rate": 1.924281004145856e-05, |
|
"loss": 1.5612, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 16.26, |
|
"learning_rate": 1.8753305259034377e-05, |
|
"loss": 1.5634, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 1.8263800476610192e-05, |
|
"loss": 1.5648, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 1.7774458916854374e-05, |
|
"loss": 1.5664, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 1.728495413443019e-05, |
|
"loss": 1.5656, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 1.679561257467437e-05, |
|
"loss": 1.5676, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 1.630610779225019e-05, |
|
"loss": 1.566, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 1.5816603009826008e-05, |
|
"loss": 1.5691, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 1.5327098227401823e-05, |
|
"loss": 1.5684, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.6387931108474731, |
|
"eval_runtime": 19.1639, |
|
"eval_samples_per_second": 1540.501, |
|
"eval_steps_per_second": 9.654, |
|
"step": 520761 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"learning_rate": 1.4837756667646002e-05, |
|
"loss": 1.5616, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 1.434825188522182e-05, |
|
"loss": 1.5545, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 1.3858747102797636e-05, |
|
"loss": 1.5551, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 1.3369405543041818e-05, |
|
"loss": 1.5558, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"learning_rate": 1.2879900760617636e-05, |
|
"loss": 1.5587, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 17.53, |
|
"learning_rate": 1.2390559200861816e-05, |
|
"loss": 1.5585, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 1.1901054418437633e-05, |
|
"loss": 1.5579, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 1.141154963601345e-05, |
|
"loss": 1.5586, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 1.0922208076257631e-05, |
|
"loss": 1.559, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 1.0432703293833448e-05, |
|
"loss": 1.5597, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.6415975093841553, |
|
"eval_runtime": 19.1825, |
|
"eval_samples_per_second": 1539.008, |
|
"eval_steps_per_second": 9.644, |
|
"step": 551394 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 9.94336173407763e-06, |
|
"loss": 1.5579, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 9.453856951653447e-06, |
|
"loss": 1.5465, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 8.964352169229264e-06, |
|
"loss": 1.5491, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 8.475010609473443e-06, |
|
"loss": 1.5495, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 7.985505827049262e-06, |
|
"loss": 1.5498, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 7.496001044625078e-06, |
|
"loss": 1.5514, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 7.006659484869258e-06, |
|
"loss": 1.5508, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 6.517154702445076e-06, |
|
"loss": 1.5516, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 6.027649920020892e-06, |
|
"loss": 1.5508, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"learning_rate": 5.538308360265074e-06, |
|
"loss": 1.5515, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 5.048803577840891e-06, |
|
"loss": 1.5521, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.6370400190353394, |
|
"eval_runtime": 19.1163, |
|
"eval_samples_per_second": 1544.337, |
|
"eval_steps_per_second": 9.678, |
|
"step": 582027 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 4.559298795416708e-06, |
|
"loss": 1.5413, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 19.19, |
|
"learning_rate": 4.069957235660889e-06, |
|
"loss": 1.5435, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"learning_rate": 3.580452453236706e-06, |
|
"loss": 1.5432, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"learning_rate": 3.091110893480887e-06, |
|
"loss": 1.5437, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 2.6016061110567034e-06, |
|
"loss": 1.5431, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"learning_rate": 2.1121013286325204e-06, |
|
"loss": 1.5431, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 19.68, |
|
"learning_rate": 1.6225965462083374e-06, |
|
"loss": 1.5441, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 1.1332549864525185e-06, |
|
"loss": 1.544, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"learning_rate": 6.437502040283355e-07, |
|
"loss": 1.5469, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 19.98, |
|
"learning_rate": 1.5440864427251657e-07, |
|
"loss": 1.5438, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.636548638343811, |
|
"eval_runtime": 19.1335, |
|
"eval_samples_per_second": 1542.949, |
|
"eval_steps_per_second": 9.669, |
|
"step": 612660 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 612660, |
|
"total_flos": 3.3229272051886326e+18, |
|
"train_loss": 1.7127611959194204, |
|
"train_runtime": 370998.644, |
|
"train_samples_per_second": 528.445, |
|
"train_steps_per_second": 1.651 |
|
} |
|
], |
|
"max_steps": 612660, |
|
"num_train_epochs": 20, |
|
"total_flos": 3.3229272051886326e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|