|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.997356828193833, |
|
"global_step": 2830, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.293286219081273e-05, |
|
"loss": 1.0928, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.47887325286865234, |
|
"eval_loss": 1.5795212984085083, |
|
"eval_runtime": 38.2173, |
|
"eval_samples_per_second": 14.862, |
|
"eval_steps_per_second": 7.431, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.593639575971731e-05, |
|
"loss": 0.7645, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.7869718074798584, |
|
"eval_loss": 0.742794930934906, |
|
"eval_runtime": 37.531, |
|
"eval_samples_per_second": 15.134, |
|
"eval_steps_per_second": 7.567, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 7.886925795053004e-05, |
|
"loss": 0.7006, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.8362675905227661, |
|
"eval_loss": 0.6150082349777222, |
|
"eval_runtime": 40.2645, |
|
"eval_samples_per_second": 14.107, |
|
"eval_steps_per_second": 7.053, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 7.180212014134276e-05, |
|
"loss": 0.5091, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.8309859037399292, |
|
"eval_loss": 0.6766383051872253, |
|
"eval_runtime": 37.4365, |
|
"eval_samples_per_second": 15.172, |
|
"eval_steps_per_second": 7.586, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 6.473498233215549e-05, |
|
"loss": 0.4137, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_accuracy": 0.8644366264343262, |
|
"eval_loss": 0.6185892820358276, |
|
"eval_runtime": 40.2147, |
|
"eval_samples_per_second": 14.124, |
|
"eval_steps_per_second": 7.062, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 5.7667844522968195e-05, |
|
"loss": 0.3657, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_accuracy": 0.8978873491287231, |
|
"eval_loss": 0.5504735112190247, |
|
"eval_runtime": 37.4129, |
|
"eval_samples_per_second": 15.182, |
|
"eval_steps_per_second": 7.591, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.0636042402826856e-05, |
|
"loss": 0.3252, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_accuracy": 0.8838028311729431, |
|
"eval_loss": 0.5585792660713196, |
|
"eval_runtime": 39.9534, |
|
"eval_samples_per_second": 14.217, |
|
"eval_steps_per_second": 7.108, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 4.356890459363958e-05, |
|
"loss": 0.2616, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_accuracy": 0.8820422291755676, |
|
"eval_loss": 0.5848411321640015, |
|
"eval_runtime": 37.6048, |
|
"eval_samples_per_second": 15.104, |
|
"eval_steps_per_second": 7.552, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 3.6501766784452293e-05, |
|
"loss": 0.2373, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_accuracy": 0.8978873491287231, |
|
"eval_loss": 0.596436619758606, |
|
"eval_runtime": 38.595, |
|
"eval_samples_per_second": 14.717, |
|
"eval_steps_per_second": 7.358, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 2.9434628975265022e-05, |
|
"loss": 0.1699, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_accuracy": 0.9031690359115601, |
|
"eval_loss": 0.5169617533683777, |
|
"eval_runtime": 40.3197, |
|
"eval_samples_per_second": 14.087, |
|
"eval_steps_per_second": 7.044, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 2.236749116607774e-05, |
|
"loss": 0.1467, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"eval_accuracy": 0.9066901206970215, |
|
"eval_loss": 0.5568466186523438, |
|
"eval_runtime": 37.5356, |
|
"eval_samples_per_second": 15.132, |
|
"eval_steps_per_second": 7.566, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 1.530035335689046e-05, |
|
"loss": 0.0975, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"eval_accuracy": 0.9049295783042908, |
|
"eval_loss": 0.5153928995132446, |
|
"eval_runtime": 40.2253, |
|
"eval_samples_per_second": 14.12, |
|
"eval_steps_per_second": 7.06, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 8.233215547703181e-06, |
|
"loss": 0.0589, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"eval_accuracy": 0.922535240650177, |
|
"eval_loss": 0.48342233896255493, |
|
"eval_runtime": 37.4958, |
|
"eval_samples_per_second": 15.148, |
|
"eval_steps_per_second": 7.574, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 1.1660777385159012e-06, |
|
"loss": 0.0775, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"eval_accuracy": 0.922535240650177, |
|
"eval_loss": 0.47951266169548035, |
|
"eval_runtime": 40.6487, |
|
"eval_samples_per_second": 13.973, |
|
"eval_steps_per_second": 6.987, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2830, |
|
"total_flos": 1.204556515540952e+18, |
|
"train_loss": 0.36904500712354277, |
|
"train_runtime": 3573.2389, |
|
"train_samples_per_second": 6.353, |
|
"train_steps_per_second": 0.792 |
|
} |
|
], |
|
"max_steps": 2830, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.204556515540952e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|