|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"global_step": 47460, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.94732406236831e-05, |
|
"loss": 1.2748, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.8946481247366205e-05, |
|
"loss": 1.2878, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.8419721871049303e-05, |
|
"loss": 1.2988, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.789296249473241e-05, |
|
"loss": 1.2645, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.7366203118415506e-05, |
|
"loss": 1.2449, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.683944374209861e-05, |
|
"loss": 1.2536, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.631268436578171e-05, |
|
"loss": 1.2103, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.5785924989464814e-05, |
|
"loss": 1.2159, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.525916561314791e-05, |
|
"loss": 1.2245, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.473240623683102e-05, |
|
"loss": 1.1803, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.420564686051412e-05, |
|
"loss": 1.1739, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.367888748419722e-05, |
|
"loss": 1.1882, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.3152128107880325e-05, |
|
"loss": 1.169, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 4.262536873156342e-05, |
|
"loss": 1.1294, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 4.209860935524653e-05, |
|
"loss": 1.1658, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 4.1571849978929626e-05, |
|
"loss": 1.1395, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 4.104509060261273e-05, |
|
"loss": 1.0973, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 4.051833122629583e-05, |
|
"loss": 1.1112, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 3.9991571849978934e-05, |
|
"loss": 1.1368, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 3.946481247366203e-05, |
|
"loss": 1.0661, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 3.893805309734514e-05, |
|
"loss": 1.0824, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3.8411293721028235e-05, |
|
"loss": 1.1049, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 3.788453434471134e-05, |
|
"loss": 1.0503, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 3.735777496839444e-05, |
|
"loss": 1.0634, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 3.6831015592077536e-05, |
|
"loss": 1.0592, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 3.630425621576064e-05, |
|
"loss": 1.0401, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 3.577749683944374e-05, |
|
"loss": 1.0138, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 3.5250737463126844e-05, |
|
"loss": 1.0445, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 3.472397808680995e-05, |
|
"loss": 1.0185, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 3.419721871049305e-05, |
|
"loss": 0.9941, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 3.367045933417615e-05, |
|
"loss": 1.0206, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 3.3143699957859256e-05, |
|
"loss": 1.0188, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 3.2616940581542354e-05, |
|
"loss": 0.972, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 3.209018120522546e-05, |
|
"loss": 0.9909, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 3.156342182890856e-05, |
|
"loss": 0.9792, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 3.1036662452591655e-05, |
|
"loss": 0.9532, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 3.050990307627476e-05, |
|
"loss": 0.9616, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 2.998314369995786e-05, |
|
"loss": 0.9773, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 2.9456384323640963e-05, |
|
"loss": 0.9291, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 2.8929624947324065e-05, |
|
"loss": 0.9526, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 2.8402865571007166e-05, |
|
"loss": 0.9516, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 2.7876106194690264e-05, |
|
"loss": 0.9112, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 2.7349346818373366e-05, |
|
"loss": 0.9198, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 2.6822587442056467e-05, |
|
"loss": 0.9436, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 2.629582806573957e-05, |
|
"loss": 0.9024, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 2.576906868942267e-05, |
|
"loss": 0.9076, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 2.524230931310577e-05, |
|
"loss": 0.9142, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 2.4715549936788876e-05, |
|
"loss": 0.9001, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 2.4188790560471978e-05, |
|
"loss": 0.8884, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 2.366203118415508e-05, |
|
"loss": 0.8909, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 2.313527180783818e-05, |
|
"loss": 0.8959, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 2.2608512431521282e-05, |
|
"loss": 0.8557, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 2.2081753055204384e-05, |
|
"loss": 0.8859, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 2.1554993678887485e-05, |
|
"loss": 0.8828, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 2.1028234302570587e-05, |
|
"loss": 0.8477, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 2.0501474926253688e-05, |
|
"loss": 0.8648, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 1.997471554993679e-05, |
|
"loss": 0.8785, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 1.944795617361989e-05, |
|
"loss": 0.8449, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 1.8921196797302992e-05, |
|
"loss": 0.8559, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"learning_rate": 1.8394437420986094e-05, |
|
"loss": 0.8546, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 19.28, |
|
"learning_rate": 1.7867678044669195e-05, |
|
"loss": 0.8199, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 1.7340918668352297e-05, |
|
"loss": 0.8308, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 1.6814159292035402e-05, |
|
"loss": 0.8645, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 20.23, |
|
"learning_rate": 1.62873999157185e-05, |
|
"loss": 0.8307, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 20.54, |
|
"learning_rate": 1.57606405394016e-05, |
|
"loss": 0.8243, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"learning_rate": 1.5233881163084704e-05, |
|
"loss": 0.8225, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 21.18, |
|
"learning_rate": 1.4707121786767806e-05, |
|
"loss": 0.8214, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"learning_rate": 1.4180362410450906e-05, |
|
"loss": 0.8093, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 1.3653603034134007e-05, |
|
"loss": 0.817, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 1.3126843657817109e-05, |
|
"loss": 0.8373, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 1.260008428150021e-05, |
|
"loss": 0.8028, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"learning_rate": 1.2073324905183313e-05, |
|
"loss": 0.8156, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 23.07, |
|
"learning_rate": 1.1546565528866415e-05, |
|
"loss": 0.8105, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"learning_rate": 1.1019806152549515e-05, |
|
"loss": 0.8085, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 23.7, |
|
"learning_rate": 1.0493046776232618e-05, |
|
"loss": 0.8071, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 9.96628739991572e-06, |
|
"loss": 0.7915, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 24.34, |
|
"learning_rate": 9.43952802359882e-06, |
|
"loss": 0.7973, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 24.65, |
|
"learning_rate": 8.912768647281922e-06, |
|
"loss": 0.7963, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 24.97, |
|
"learning_rate": 8.386009270965024e-06, |
|
"loss": 0.7947, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 25.28, |
|
"learning_rate": 7.859249894648125e-06, |
|
"loss": 0.7929, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 7.3324905183312265e-06, |
|
"loss": 0.794, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"learning_rate": 6.805731142014328e-06, |
|
"loss": 0.7901, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 26.23, |
|
"learning_rate": 6.2789717656974295e-06, |
|
"loss": 0.7732, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 5.752212389380531e-06, |
|
"loss": 0.7862, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"learning_rate": 5.225453013063633e-06, |
|
"loss": 0.7973, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 27.18, |
|
"learning_rate": 4.698693636746735e-06, |
|
"loss": 0.7813, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 4.171934260429835e-06, |
|
"loss": 0.7834, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 27.81, |
|
"learning_rate": 3.6451748841129377e-06, |
|
"loss": 0.785, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 28.13, |
|
"learning_rate": 3.1184155077960387e-06, |
|
"loss": 0.7904, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 28.45, |
|
"learning_rate": 2.59165613147914e-06, |
|
"loss": 0.7809, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 28.76, |
|
"learning_rate": 2.064896755162242e-06, |
|
"loss": 0.7804, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 29.08, |
|
"learning_rate": 1.5381373788453435e-06, |
|
"loss": 0.7755, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 29.39, |
|
"learning_rate": 1.0113780025284452e-06, |
|
"loss": 0.7789, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"learning_rate": 4.846186262115466e-07, |
|
"loss": 0.7694, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 47460, |
|
"total_flos": 4.958800183296e+16, |
|
"train_loss": 0.9465416105698796, |
|
"train_runtime": 17544.8246, |
|
"train_samples_per_second": 5.408, |
|
"train_steps_per_second": 2.705 |
|
} |
|
], |
|
"max_steps": 47460, |
|
"num_train_epochs": 30, |
|
"total_flos": 4.958800183296e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|