|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7001983895437041, |
|
"eval_steps": 250, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11669973159061735, |
|
"grad_norm": 0.6994202136993408, |
|
"learning_rate": 9.805520181836609e-05, |
|
"loss": 0.478, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11669973159061735, |
|
"eval_loss": 0.251759797334671, |
|
"eval_runtime": 31.9012, |
|
"eval_samples_per_second": 5.454, |
|
"eval_steps_per_second": 5.454, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2333994631812347, |
|
"grad_norm": 0.49493369460105896, |
|
"learning_rate": 8.956192175374174e-05, |
|
"loss": 0.1602, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2333994631812347, |
|
"eval_loss": 0.13994424045085907, |
|
"eval_runtime": 31.9941, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 5.438, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35009919477185203, |
|
"grad_norm": 0.4915720224380493, |
|
"learning_rate": 7.547875537973998e-05, |
|
"loss": 0.1159, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.35009919477185203, |
|
"eval_loss": 0.11492624133825302, |
|
"eval_runtime": 31.9748, |
|
"eval_samples_per_second": 5.442, |
|
"eval_steps_per_second": 5.442, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4667989263624694, |
|
"grad_norm": 0.3786638677120209, |
|
"learning_rate": 5.779557819828257e-05, |
|
"loss": 0.1003, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4667989263624694, |
|
"eval_loss": 0.09217362850904465, |
|
"eval_runtime": 31.8976, |
|
"eval_samples_per_second": 5.455, |
|
"eval_steps_per_second": 5.455, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5834986579530868, |
|
"grad_norm": 0.5179420113563538, |
|
"learning_rate": 3.901092783472074e-05, |
|
"loss": 0.0885, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5834986579530868, |
|
"eval_loss": 0.08533080667257309, |
|
"eval_runtime": 31.9485, |
|
"eval_samples_per_second": 5.446, |
|
"eval_steps_per_second": 5.446, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7001983895437041, |
|
"grad_norm": 0.33462783694267273, |
|
"learning_rate": 2.1778974138217168e-05, |
|
"loss": 0.0787, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7001983895437041, |
|
"eval_loss": 0.08074714988470078, |
|
"eval_runtime": 31.9078, |
|
"eval_samples_per_second": 5.453, |
|
"eval_steps_per_second": 5.453, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 250, |
|
"max_steps": 2142, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 250, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.485342848505856e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|