|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.0, |
|
"global_step": 9564, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.7386030949393564e-05, |
|
"loss": 1.4207, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.9008888888888889, |
|
"eval_loss": 0.7066789269447327, |
|
"eval_runtime": 2.6965, |
|
"eval_samples_per_second": 3337.621, |
|
"eval_steps_per_second": 417.203, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.477206189878712e-05, |
|
"loss": 0.5086, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.9516666666666667, |
|
"eval_loss": 0.3055577874183655, |
|
"eval_runtime": 2.6576, |
|
"eval_samples_per_second": 3386.509, |
|
"eval_steps_per_second": 423.314, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.215809284818068e-05, |
|
"loss": 0.2731, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9648888888888889, |
|
"eval_loss": 0.18555375933647156, |
|
"eval_runtime": 2.6597, |
|
"eval_samples_per_second": 3383.793, |
|
"eval_steps_per_second": 422.974, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.954412379757424e-05, |
|
"loss": 0.1976, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.9701111111111111, |
|
"eval_loss": 0.14159560203552246, |
|
"eval_runtime": 2.715, |
|
"eval_samples_per_second": 3314.86, |
|
"eval_steps_per_second": 414.357, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.69301547469678e-05, |
|
"loss": 0.1565, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.9738888888888889, |
|
"eval_loss": 0.11081045866012573, |
|
"eval_runtime": 2.6963, |
|
"eval_samples_per_second": 3337.905, |
|
"eval_steps_per_second": 417.238, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.431618569636136e-05, |
|
"loss": 0.128, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.976, |
|
"eval_loss": 0.09747562557458878, |
|
"eval_runtime": 2.6961, |
|
"eval_samples_per_second": 3338.209, |
|
"eval_steps_per_second": 417.276, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.170221664575492e-05, |
|
"loss": 0.1133, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.9788888888888889, |
|
"eval_loss": 0.08474569022655487, |
|
"eval_runtime": 2.7245, |
|
"eval_samples_per_second": 3303.375, |
|
"eval_steps_per_second": 412.922, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.9088247595148475e-05, |
|
"loss": 0.1031, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 0.9804444444444445, |
|
"eval_loss": 0.07724875211715698, |
|
"eval_runtime": 2.6363, |
|
"eval_samples_per_second": 3413.847, |
|
"eval_steps_per_second": 426.731, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.6474278544542037e-05, |
|
"loss": 0.09, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.9818888888888889, |
|
"eval_loss": 0.0697416290640831, |
|
"eval_runtime": 2.6295, |
|
"eval_samples_per_second": 3422.689, |
|
"eval_steps_per_second": 427.836, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.386030949393559e-05, |
|
"loss": 0.0871, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_accuracy": 0.9815555555555555, |
|
"eval_loss": 0.066066212952137, |
|
"eval_runtime": 2.6946, |
|
"eval_samples_per_second": 3340.06, |
|
"eval_steps_per_second": 417.507, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 2.1246340443329153e-05, |
|
"loss": 0.0733, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.9822222222222222, |
|
"eval_loss": 0.06342040002346039, |
|
"eval_runtime": 2.6897, |
|
"eval_samples_per_second": 3346.09, |
|
"eval_steps_per_second": 418.261, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.863237139272271e-05, |
|
"loss": 0.0761, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"eval_accuracy": 0.983, |
|
"eval_loss": 0.06072380393743515, |
|
"eval_runtime": 2.6938, |
|
"eval_samples_per_second": 3340.98, |
|
"eval_steps_per_second": 417.623, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.601840234211627e-05, |
|
"loss": 0.0739, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.9832222222222222, |
|
"eval_loss": 0.05795769765973091, |
|
"eval_runtime": 2.6767, |
|
"eval_samples_per_second": 3362.391, |
|
"eval_steps_per_second": 420.299, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.340443329150983e-05, |
|
"loss": 0.0643, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_accuracy": 0.9844444444444445, |
|
"eval_loss": 0.05685265362262726, |
|
"eval_runtime": 2.6876, |
|
"eval_samples_per_second": 3348.672, |
|
"eval_steps_per_second": 418.584, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 1.0790464240903388e-05, |
|
"loss": 0.0678, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_accuracy": 0.984, |
|
"eval_loss": 0.05617769435048103, |
|
"eval_runtime": 2.6484, |
|
"eval_samples_per_second": 3398.278, |
|
"eval_steps_per_second": 424.785, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 8.176495190296946e-06, |
|
"loss": 0.0617, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_accuracy": 0.9853333333333333, |
|
"eval_loss": 0.053985536098480225, |
|
"eval_runtime": 2.672, |
|
"eval_samples_per_second": 3368.244, |
|
"eval_steps_per_second": 421.03, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 5.562526139690506e-06, |
|
"loss": 0.0571, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_accuracy": 0.9847777777777778, |
|
"eval_loss": 0.05352585390210152, |
|
"eval_runtime": 2.7082, |
|
"eval_samples_per_second": 3323.274, |
|
"eval_steps_per_second": 415.409, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 2.9485570890840656e-06, |
|
"loss": 0.0608, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_accuracy": 0.9851111111111112, |
|
"eval_loss": 0.053133774548769, |
|
"eval_runtime": 2.6753, |
|
"eval_samples_per_second": 3364.134, |
|
"eval_steps_per_second": 420.517, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 3.345880384776244e-07, |
|
"loss": 0.0571, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_accuracy": 0.9847777777777778, |
|
"eval_loss": 0.05344167724251747, |
|
"eval_runtime": 2.6425, |
|
"eval_samples_per_second": 3405.863, |
|
"eval_steps_per_second": 425.733, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 9564, |
|
"total_flos": 264960533376000.0, |
|
"train_loss": 0.1922683648263396, |
|
"train_runtime": 134.4457, |
|
"train_samples_per_second": 2276.012, |
|
"train_steps_per_second": 71.137 |
|
} |
|
], |
|
"max_steps": 9564, |
|
"num_train_epochs": 6, |
|
"total_flos": 264960533376000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|