|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.31506849315068, |
|
"eval_steps": 400, |
|
"global_step": 14400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.7123287671232876, |
|
"grad_norm": 2.024919271469116, |
|
"learning_rate": 5e-05, |
|
"loss": 3.8492, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.4246575342465753, |
|
"grad_norm": 1.7762272357940674, |
|
"learning_rate": 4.6973365617433416e-05, |
|
"loss": 3.104, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.136986301369863, |
|
"grad_norm": 1.9453787803649902, |
|
"learning_rate": 4.394673123486683e-05, |
|
"loss": 2.7752, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.8493150684931505, |
|
"grad_norm": 1.9978324174880981, |
|
"learning_rate": 4.092009685230024e-05, |
|
"loss": 2.5478, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.561643835616438, |
|
"grad_norm": 1.8244125843048096, |
|
"learning_rate": 3.789346246973366e-05, |
|
"loss": 2.3579, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.273972602739725, |
|
"grad_norm": 2.0865862369537354, |
|
"learning_rate": 3.486682808716707e-05, |
|
"loss": 2.2151, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.986301369863014, |
|
"grad_norm": 2.022252321243286, |
|
"learning_rate": 3.184019370460048e-05, |
|
"loss": 2.0783, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.698630136986301, |
|
"grad_norm": 2.030592918395996, |
|
"learning_rate": 2.88135593220339e-05, |
|
"loss": 1.9546, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.41095890410959, |
|
"grad_norm": 2.101133346557617, |
|
"learning_rate": 2.5786924939467316e-05, |
|
"loss": 1.8636, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.123287671232877, |
|
"grad_norm": 2.0378260612487793, |
|
"learning_rate": 2.2760290556900726e-05, |
|
"loss": 1.7814, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.835616438356166, |
|
"grad_norm": 2.2219014167785645, |
|
"learning_rate": 1.9733656174334143e-05, |
|
"loss": 1.7052, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 20.54794520547945, |
|
"grad_norm": 2.211897373199463, |
|
"learning_rate": 1.6707021791767556e-05, |
|
"loss": 1.6337, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.26027397260274, |
|
"grad_norm": 2.0061228275299072, |
|
"learning_rate": 1.3680387409200971e-05, |
|
"loss": 1.5842, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 23.972602739726028, |
|
"grad_norm": 2.1824605464935303, |
|
"learning_rate": 1.0653753026634383e-05, |
|
"loss": 1.544, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 25.684931506849313, |
|
"grad_norm": 2.1209356784820557, |
|
"learning_rate": 7.627118644067798e-06, |
|
"loss": 1.5001, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 27.397260273972602, |
|
"grad_norm": 2.1307101249694824, |
|
"learning_rate": 4.600484261501211e-06, |
|
"loss": 1.4742, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 29.10958904109589, |
|
"grad_norm": 2.0766286849975586, |
|
"learning_rate": 1.5738498789346248e-06, |
|
"loss": 1.4553, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 30.82191780821918, |
|
"grad_norm": 2.4918978214263916, |
|
"learning_rate": 1.9858156028368796e-05, |
|
"loss": 1.6283, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 32.534246575342465, |
|
"grad_norm": 2.147444725036621, |
|
"learning_rate": 1.8085106382978724e-05, |
|
"loss": 1.5809, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 34.24657534246575, |
|
"grad_norm": 2.2365379333496094, |
|
"learning_rate": 1.6312056737588656e-05, |
|
"loss": 1.5219, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 35.95890410958904, |
|
"grad_norm": 2.2686374187469482, |
|
"learning_rate": 1.4539007092198581e-05, |
|
"loss": 1.4767, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 37.67123287671233, |
|
"grad_norm": 2.2740468978881836, |
|
"learning_rate": 1.2765957446808511e-05, |
|
"loss": 1.4248, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 39.38356164383562, |
|
"grad_norm": 2.060163736343384, |
|
"learning_rate": 1.0992907801418441e-05, |
|
"loss": 1.3853, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 41.0958904109589, |
|
"grad_norm": 2.2763679027557373, |
|
"learning_rate": 9.219858156028368e-06, |
|
"loss": 1.3555, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 42.80821917808219, |
|
"grad_norm": 2.2008297443389893, |
|
"learning_rate": 7.446808510638298e-06, |
|
"loss": 1.3242, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 44.52054794520548, |
|
"grad_norm": 2.5200419425964355, |
|
"learning_rate": 5.673758865248227e-06, |
|
"loss": 1.3008, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 46.23287671232877, |
|
"grad_norm": 2.2964625358581543, |
|
"learning_rate": 3.9007092198581565e-06, |
|
"loss": 1.2794, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 47.945205479452056, |
|
"grad_norm": 2.3451294898986816, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"loss": 1.2691, |
|
"step": 14000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 14600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.0045885421584384e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|