|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 2277, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04391743522178305, |
|
"grad_norm": 0.08186139911413193, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 2.3443, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0878348704435661, |
|
"grad_norm": 0.18966266512870789, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 2.2978, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13175230566534915, |
|
"grad_norm": 0.36546170711517334, |
|
"learning_rate": 2e-05, |
|
"loss": 2.2561, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1756697408871322, |
|
"grad_norm": 0.4150899052619934, |
|
"learning_rate": 1.987400818332026e-05, |
|
"loss": 2.1827, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21958717610891523, |
|
"grad_norm": 0.5007146000862122, |
|
"learning_rate": 1.9499207520855085e-05, |
|
"loss": 2.1245, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2635046113306983, |
|
"grad_norm": 0.6546090245246887, |
|
"learning_rate": 1.8885042375875825e-05, |
|
"loss": 2.1053, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.30742204655248134, |
|
"grad_norm": 0.708943247795105, |
|
"learning_rate": 1.8046988704853946e-05, |
|
"loss": 2.0842, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3513394817742644, |
|
"grad_norm": 0.5875030159950256, |
|
"learning_rate": 1.700616408868689e-05, |
|
"loss": 2.0717, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3952569169960474, |
|
"grad_norm": 0.60301673412323, |
|
"learning_rate": 1.578879560422182e-05, |
|
"loss": 2.0419, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.43917435221783047, |
|
"grad_norm": 0.5948824882507324, |
|
"learning_rate": 1.4425558944844027e-05, |
|
"loss": 2.0202, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4830917874396135, |
|
"grad_norm": 0.7956698536872864, |
|
"learning_rate": 1.29508054432094e-05, |
|
"loss": 2.0113, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5270092226613966, |
|
"grad_norm": 0.7147735953330994, |
|
"learning_rate": 1.1401696473883086e-05, |
|
"loss": 1.9828, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5709266578831796, |
|
"grad_norm": 0.8019099235534668, |
|
"learning_rate": 9.81726704752115e-06, |
|
"loss": 1.9665, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6148440931049627, |
|
"grad_norm": 0.7160748243331909, |
|
"learning_rate": 8.237442192489225e-06, |
|
"loss": 2.0038, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6587615283267457, |
|
"grad_norm": 0.7949112057685852, |
|
"learning_rate": 6.70203090949157e-06, |
|
"loss": 1.977, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7026789635485288, |
|
"grad_norm": 0.978585958480835, |
|
"learning_rate": 5.249723049907276e-06, |
|
"loss": 1.959, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7465963987703118, |
|
"grad_norm": 1.2144778966903687, |
|
"learning_rate": 3.917114394857796e-06, |
|
"loss": 1.9782, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7905138339920948, |
|
"grad_norm": 0.7180888652801514, |
|
"learning_rate": 2.737784501417702e-06, |
|
"loss": 1.95, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8344312692138779, |
|
"grad_norm": 0.7736744284629822, |
|
"learning_rate": 1.7414505527348436e-06, |
|
"loss": 1.9551, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8783487044356609, |
|
"grad_norm": 0.7984046936035156, |
|
"learning_rate": 9.532185336520706e-07, |
|
"loss": 1.9713, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.922266139657444, |
|
"grad_norm": 0.9639809727668762, |
|
"learning_rate": 3.92950600979255e-07, |
|
"loss": 1.9249, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.966183574879227, |
|
"grad_norm": 0.799350917339325, |
|
"learning_rate": 7.476458964937316e-08, |
|
"loss": 1.9758, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2277, |
|
"total_flos": 4.13804323012608e+16, |
|
"train_loss": 2.0515993006877538, |
|
"train_runtime": 708.674, |
|
"train_samples_per_second": 6.426, |
|
"train_steps_per_second": 3.213 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2277, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.13804323012608e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|