|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 21.693934737413, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 7.7201, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0005555555555555556, |
|
"loss": 6.4428, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0008333333333333334, |
|
"loss": 5.9072, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0009929078014184398, |
|
"loss": 3.7123, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.000975177304964539, |
|
"loss": 2.6271, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0009574468085106384, |
|
"loss": 2.2823, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0009397163120567376, |
|
"loss": 2.1231, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.0009219858156028368, |
|
"loss": 2.0297, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.0009042553191489362, |
|
"loss": 1.9652, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0008865248226950354, |
|
"loss": 1.9186, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0008687943262411348, |
|
"loss": 1.8837, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.000851063829787234, |
|
"loss": 1.8546, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0008333333333333334, |
|
"loss": 1.8322, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.0008156028368794326, |
|
"loss": 1.8122, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.0007978723404255319, |
|
"loss": 1.7951, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.0007801418439716312, |
|
"loss": 1.7814, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0007624113475177306, |
|
"loss": 1.7679, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.0007446808510638298, |
|
"loss": 1.756, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.0007269503546099291, |
|
"loss": 1.7466, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.0007092198581560284, |
|
"loss": 1.7355, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.0006914893617021278, |
|
"loss": 1.7268, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.0006737588652482269, |
|
"loss": 1.7197, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.0006560283687943263, |
|
"loss": 1.7111, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 0.0006382978723404256, |
|
"loss": 1.7043, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.0006205673758865247, |
|
"loss": 1.6979, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.0006028368794326241, |
|
"loss": 1.6909, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 0.0005851063829787234, |
|
"loss": 1.6856, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.0005673758865248228, |
|
"loss": 1.6796, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 0.0005496453900709219, |
|
"loss": 1.6744, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 0.0005319148936170213, |
|
"loss": 1.6699, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 0.0005141843971631206, |
|
"loss": 1.6648, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 0.0004964539007092199, |
|
"loss": 1.6595, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 0.0004787234042553192, |
|
"loss": 1.6553, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 0.0004609929078014184, |
|
"loss": 1.6511, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 0.0004432624113475177, |
|
"loss": 1.6466, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 0.000425531914893617, |
|
"loss": 1.6431, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 0.0004078014184397163, |
|
"loss": 1.6386, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 0.0003900709219858156, |
|
"loss": 1.6353, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 0.0003723404255319149, |
|
"loss": 1.6307, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 0.0003546099290780142, |
|
"loss": 1.6279, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 0.00033687943262411345, |
|
"loss": 1.6241, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 0.0003191489361702128, |
|
"loss": 1.6215, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 0.00030141843971631205, |
|
"loss": 1.6178, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 0.0002836879432624114, |
|
"loss": 1.6149, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 0.00026595744680851064, |
|
"loss": 1.6118, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 0.00024822695035460994, |
|
"loss": 1.6086, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 0.0002304964539007092, |
|
"loss": 1.6064, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"learning_rate": 0.0002127659574468085, |
|
"loss": 1.6028, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 0.0001950354609929078, |
|
"loss": 1.5994, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 0.0001773049645390071, |
|
"loss": 1.5971, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 0.0001595744680851064, |
|
"loss": 1.5938, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 0.0001418439716312057, |
|
"loss": 1.5922, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"learning_rate": 0.00012411347517730497, |
|
"loss": 1.589, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 0.00010638297872340425, |
|
"loss": 1.5867, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"learning_rate": 8.865248226950355e-05, |
|
"loss": 1.5839, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"learning_rate": 7.092198581560285e-05, |
|
"loss": 1.5811, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 20.61, |
|
"learning_rate": 5.319148936170213e-05, |
|
"loss": 1.5795, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 20.97, |
|
"learning_rate": 3.5460992907801425e-05, |
|
"loss": 1.577, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 1.7730496453900712e-05, |
|
"loss": 1.575, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"learning_rate": 0.0, |
|
"loss": 1.5733, |
|
"step": 30000 |
|
} |
|
], |
|
"max_steps": 30000, |
|
"num_train_epochs": 22, |
|
"total_flos": 1.9277555263183258e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|