|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00012, |
|
"loss": 12.7445, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019555555555555556, |
|
"loss": 11.2951, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018222222222222224, |
|
"loss": 9.9077, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00016888888888888889, |
|
"loss": 8.6537, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 7.7628, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00014222222222222224, |
|
"loss": 7.0578, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00012888888888888892, |
|
"loss": 6.7923, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00011555555555555555, |
|
"loss": 6.4867, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00010222222222222222, |
|
"loss": 6.4616, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 6.278, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 7.555555555555556e-05, |
|
"loss": 6.2509, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 6.222222222222222e-05, |
|
"loss": 6.1527, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 6.1656, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 6.0661, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 6.0176, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 6.0408, |
|
"step": 144 |
|
} |
|
], |
|
"logging_steps": 9, |
|
"max_steps": 150, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 281424942858240.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|