|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.13131545254587834, |
|
"eval_steps": 1000, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0065657726272939166, |
|
"grad_norm": 0.20985035598278046, |
|
"learning_rate": 1.9875205254515602e-05, |
|
"loss": 1.2854, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0065657726272939166, |
|
"eval_loss": 1.0137826204299927, |
|
"eval_runtime": 30083.3012, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.013131545254587833, |
|
"grad_norm": 0.25870802998542786, |
|
"learning_rate": 1.9743842364532024e-05, |
|
"loss": 0.9595, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.013131545254587833, |
|
"eval_loss": 0.9594874382019043, |
|
"eval_runtime": 30081.6328, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01969731788188175, |
|
"grad_norm": 0.22705765068531036, |
|
"learning_rate": 1.961247947454844e-05, |
|
"loss": 0.9371, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01969731788188175, |
|
"eval_loss": 0.943973183631897, |
|
"eval_runtime": 30082.9648, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.026263090509175666, |
|
"grad_norm": 0.15589605271816254, |
|
"learning_rate": 1.9481116584564863e-05, |
|
"loss": 0.9386, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.026263090509175666, |
|
"eval_loss": 0.9333989024162292, |
|
"eval_runtime": 30081.8754, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.032828863136469585, |
|
"grad_norm": 0.3671300411224365, |
|
"learning_rate": 1.934975369458128e-05, |
|
"loss": 0.9166, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.032828863136469585, |
|
"eval_loss": 0.926673173904419, |
|
"eval_runtime": 30080.758, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0393946357637635, |
|
"grad_norm": 0.23869025707244873, |
|
"learning_rate": 1.9218390804597703e-05, |
|
"loss": 0.9236, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0393946357637635, |
|
"eval_loss": 0.9226961135864258, |
|
"eval_runtime": 30048.7574, |
|
"eval_samples_per_second": 0.427, |
|
"eval_steps_per_second": 0.053, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04596040839105742, |
|
"grad_norm": 0.27560362219810486, |
|
"learning_rate": 1.9087027914614124e-05, |
|
"loss": 0.9083, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.04596040839105742, |
|
"eval_loss": 0.9187578558921814, |
|
"eval_runtime": 30068.8429, |
|
"eval_samples_per_second": 0.427, |
|
"eval_steps_per_second": 0.053, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05252618101835133, |
|
"grad_norm": 0.2736820578575134, |
|
"learning_rate": 1.8955665024630542e-05, |
|
"loss": 0.929, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.05252618101835133, |
|
"eval_loss": 0.9152230620384216, |
|
"eval_runtime": 30083.9319, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.05909195364564525, |
|
"grad_norm": 0.2768559455871582, |
|
"learning_rate": 1.8824302134646964e-05, |
|
"loss": 0.9077, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05909195364564525, |
|
"eval_loss": 0.9131888747215271, |
|
"eval_runtime": 30078.968, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06565772627293917, |
|
"grad_norm": 0.29504087567329407, |
|
"learning_rate": 1.8692939244663385e-05, |
|
"loss": 0.9018, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06565772627293917, |
|
"eval_loss": 0.9111798405647278, |
|
"eval_runtime": 30085.2411, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07222349890023308, |
|
"grad_norm": 0.32929274439811707, |
|
"learning_rate": 1.8561576354679803e-05, |
|
"loss": 0.9113, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.07222349890023308, |
|
"eval_loss": 0.9082886576652527, |
|
"eval_runtime": 30080.1756, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.078789271527527, |
|
"grad_norm": 0.3265211582183838, |
|
"learning_rate": 1.8430213464696225e-05, |
|
"loss": 0.8797, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.078789271527527, |
|
"eval_loss": 0.9068158864974976, |
|
"eval_runtime": 30079.3778, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.08535504415482092, |
|
"grad_norm": 0.338413268327713, |
|
"learning_rate": 1.8298850574712646e-05, |
|
"loss": 0.9187, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.08535504415482092, |
|
"eval_loss": 0.9051068425178528, |
|
"eval_runtime": 30077.7335, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.09192081678211483, |
|
"grad_norm": 0.28864777088165283, |
|
"learning_rate": 1.8167487684729067e-05, |
|
"loss": 0.893, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.09192081678211483, |
|
"eval_loss": 0.9037203192710876, |
|
"eval_runtime": 30076.794, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.09848658940940876, |
|
"grad_norm": 0.3874276578426361, |
|
"learning_rate": 1.8036124794745485e-05, |
|
"loss": 0.897, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09848658940940876, |
|
"eval_loss": 0.9024273753166199, |
|
"eval_runtime": 30075.0565, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.10505236203670267, |
|
"grad_norm": 0.395245760679245, |
|
"learning_rate": 1.7904761904761907e-05, |
|
"loss": 0.8966, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.10505236203670267, |
|
"eval_loss": 0.9006879329681396, |
|
"eval_runtime": 30076.5102, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.11161813466399659, |
|
"grad_norm": 0.36763593554496765, |
|
"learning_rate": 1.777339901477833e-05, |
|
"loss": 0.9013, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.11161813466399659, |
|
"eval_loss": 0.8995980620384216, |
|
"eval_runtime": 30075.5407, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1181839072912905, |
|
"grad_norm": 0.3429044783115387, |
|
"learning_rate": 1.7642036124794746e-05, |
|
"loss": 0.887, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.1181839072912905, |
|
"eval_loss": 0.8988845944404602, |
|
"eval_runtime": 30075.9991, |
|
"eval_samples_per_second": 0.426, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.12474967991858442, |
|
"grad_norm": 0.41019654273986816, |
|
"learning_rate": 1.7510673234811168e-05, |
|
"loss": 0.8988, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.12474967991858442, |
|
"eval_loss": 0.8975555300712585, |
|
"eval_runtime": 30071.8456, |
|
"eval_samples_per_second": 0.427, |
|
"eval_steps_per_second": 0.053, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.13131545254587834, |
|
"grad_norm": 0.36183497309684753, |
|
"learning_rate": 1.7379310344827586e-05, |
|
"loss": 0.8733, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13131545254587834, |
|
"eval_loss": 0.8967778086662292, |
|
"eval_runtime": 30071.3584, |
|
"eval_samples_per_second": 0.427, |
|
"eval_steps_per_second": 0.053, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 15230, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0606252120788173e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|