|
{ |
|
"best_metric": 0.8571428571428571, |
|
"best_model_checkpoint": "distillBEiT/checkpoints/checkpoint-1953", |
|
"epoch": 31.0, |
|
"eval_steps": 500, |
|
"global_step": 1953, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9126984126984125e-05, |
|
"loss": 6.699, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.14285714285714285, |
|
"eval_loss": 11.804450035095215, |
|
"eval_runtime": 2.7027, |
|
"eval_samples_per_second": 20.72, |
|
"eval_steps_per_second": 2.59, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.812698412698413e-05, |
|
"loss": 4.176, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.44642857142857145, |
|
"eval_loss": 8.514544486999512, |
|
"eval_runtime": 2.7591, |
|
"eval_samples_per_second": 20.297, |
|
"eval_steps_per_second": 2.537, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.712698412698413e-05, |
|
"loss": 2.6846, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.125, |
|
"eval_loss": 26.979318618774414, |
|
"eval_runtime": 2.7027, |
|
"eval_samples_per_second": 20.72, |
|
"eval_steps_per_second": 2.59, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.612698412698413e-05, |
|
"loss": 2.2263, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.08928571428571429, |
|
"eval_loss": 33.445133209228516, |
|
"eval_runtime": 2.6746, |
|
"eval_samples_per_second": 20.937, |
|
"eval_steps_per_second": 2.617, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.512698412698413e-05, |
|
"loss": 1.8895, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 8.057156562805176, |
|
"eval_runtime": 2.7714, |
|
"eval_samples_per_second": 20.207, |
|
"eval_steps_per_second": 2.526, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4126984126984126e-05, |
|
"loss": 1.525, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4107142857142857, |
|
"eval_loss": 8.990975379943848, |
|
"eval_runtime": 2.745, |
|
"eval_samples_per_second": 20.4, |
|
"eval_steps_per_second": 2.55, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.312698412698413e-05, |
|
"loss": 1.4566, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5714285714285714, |
|
"eval_loss": 6.924566745758057, |
|
"eval_runtime": 2.7577, |
|
"eval_samples_per_second": 20.307, |
|
"eval_steps_per_second": 2.538, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.212698412698413e-05, |
|
"loss": 1.3691, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 11.207305908203125, |
|
"eval_runtime": 2.6969, |
|
"eval_samples_per_second": 20.764, |
|
"eval_steps_per_second": 2.596, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.112698412698413e-05, |
|
"loss": 1.168, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 6.766927242279053, |
|
"eval_runtime": 2.8201, |
|
"eval_samples_per_second": 19.857, |
|
"eval_steps_per_second": 2.482, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.012698412698413e-05, |
|
"loss": 1.1166, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7857142857142857, |
|
"eval_loss": 4.691386699676514, |
|
"eval_runtime": 2.7695, |
|
"eval_samples_per_second": 20.22, |
|
"eval_steps_per_second": 2.528, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9126984126984126e-05, |
|
"loss": 1.0649, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7678571428571429, |
|
"eval_loss": 5.042652606964111, |
|
"eval_runtime": 2.6881, |
|
"eval_samples_per_second": 20.833, |
|
"eval_steps_per_second": 2.604, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.812698412698413e-05, |
|
"loss": 0.97, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 6.7766571044921875, |
|
"eval_runtime": 2.6425, |
|
"eval_samples_per_second": 21.192, |
|
"eval_steps_per_second": 2.649, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7126984126984125e-05, |
|
"loss": 1.0896, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8035714285714286, |
|
"eval_loss": 11.280374526977539, |
|
"eval_runtime": 2.7964, |
|
"eval_samples_per_second": 20.026, |
|
"eval_steps_per_second": 2.503, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.6126984126984135e-05, |
|
"loss": 0.9401, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6607142857142857, |
|
"eval_loss": 11.328347206115723, |
|
"eval_runtime": 2.8029, |
|
"eval_samples_per_second": 19.979, |
|
"eval_steps_per_second": 2.497, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.512698412698413e-05, |
|
"loss": 0.9331, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6964285714285714, |
|
"eval_loss": 12.213973999023438, |
|
"eval_runtime": 2.6558, |
|
"eval_samples_per_second": 21.086, |
|
"eval_steps_per_second": 2.636, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.412698412698413e-05, |
|
"loss": 0.8237, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7321428571428571, |
|
"eval_loss": 7.346871852874756, |
|
"eval_runtime": 2.7127, |
|
"eval_samples_per_second": 20.644, |
|
"eval_steps_per_second": 2.58, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.312698412698413e-05, |
|
"loss": 0.7515, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7857142857142857, |
|
"eval_loss": 4.754016399383545, |
|
"eval_runtime": 2.7003, |
|
"eval_samples_per_second": 20.738, |
|
"eval_steps_per_second": 2.592, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.2126984126984126e-05, |
|
"loss": 0.7622, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7857142857142857, |
|
"eval_loss": 4.091554641723633, |
|
"eval_runtime": 2.7272, |
|
"eval_samples_per_second": 20.534, |
|
"eval_steps_per_second": 2.567, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 3.112698412698413e-05, |
|
"loss": 0.853, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.16071428571428573, |
|
"eval_loss": 12.633813858032227, |
|
"eval_runtime": 2.6777, |
|
"eval_samples_per_second": 20.914, |
|
"eval_steps_per_second": 2.614, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3.0126984126984124e-05, |
|
"loss": 0.6455, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7678571428571429, |
|
"eval_loss": 9.073665618896484, |
|
"eval_runtime": 2.7224, |
|
"eval_samples_per_second": 20.57, |
|
"eval_steps_per_second": 2.571, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2.912698412698413e-05, |
|
"loss": 0.6667, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7321428571428571, |
|
"eval_loss": 4.806458950042725, |
|
"eval_runtime": 2.6589, |
|
"eval_samples_per_second": 21.062, |
|
"eval_steps_per_second": 2.633, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2.812698412698413e-05, |
|
"loss": 0.689, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.48214285714285715, |
|
"eval_loss": 9.992805480957031, |
|
"eval_runtime": 2.6988, |
|
"eval_samples_per_second": 20.75, |
|
"eval_steps_per_second": 2.594, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2.712698412698413e-05, |
|
"loss": 0.6914, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5357142857142857, |
|
"eval_loss": 18.975879669189453, |
|
"eval_runtime": 2.6271, |
|
"eval_samples_per_second": 21.316, |
|
"eval_steps_per_second": 2.665, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2.612698412698413e-05, |
|
"loss": 0.677, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.125, |
|
"eval_loss": 15.608841896057129, |
|
"eval_runtime": 2.674, |
|
"eval_samples_per_second": 20.942, |
|
"eval_steps_per_second": 2.618, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2.5126984126984128e-05, |
|
"loss": 0.6575, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.625, |
|
"eval_loss": 13.633025169372559, |
|
"eval_runtime": 2.6744, |
|
"eval_samples_per_second": 20.94, |
|
"eval_steps_per_second": 2.617, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2.4126984126984128e-05, |
|
"loss": 0.5564, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.375, |
|
"eval_loss": 9.142752647399902, |
|
"eval_runtime": 2.7086, |
|
"eval_samples_per_second": 20.675, |
|
"eval_steps_per_second": 2.584, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2.3126984126984127e-05, |
|
"loss": 0.7167, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 12.48905086517334, |
|
"eval_runtime": 2.6896, |
|
"eval_samples_per_second": 20.821, |
|
"eval_steps_per_second": 2.603, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2.212698412698413e-05, |
|
"loss": 0.5167, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7857142857142857, |
|
"eval_loss": 6.086201190948486, |
|
"eval_runtime": 2.6432, |
|
"eval_samples_per_second": 21.186, |
|
"eval_steps_per_second": 2.648, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2.112698412698413e-05, |
|
"loss": 0.5433, |
|
"step": 1827 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.375, |
|
"eval_loss": 10.653279304504395, |
|
"eval_runtime": 2.7065, |
|
"eval_samples_per_second": 20.691, |
|
"eval_steps_per_second": 2.586, |
|
"step": 1827 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2.012698412698413e-05, |
|
"loss": 0.4732, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7321428571428571, |
|
"eval_loss": 5.170372009277344, |
|
"eval_runtime": 2.6871, |
|
"eval_samples_per_second": 20.841, |
|
"eval_steps_per_second": 2.605, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 1.9126984126984128e-05, |
|
"loss": 0.4945, |
|
"step": 1953 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 3.8952624797821045, |
|
"eval_runtime": 2.7235, |
|
"eval_samples_per_second": 20.562, |
|
"eval_steps_per_second": 2.57, |
|
"step": 1953 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3150, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 3.126627671021568e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|