|
{ |
|
"best_metric": 0.28330111503601074, |
|
"best_model_checkpoint": "/content/best_model/checkpoint-65000", |
|
"epoch": 2.0, |
|
"global_step": 69122, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.638320650444142e-05, |
|
"loss": 0.3604, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.8821028470993042, |
|
"eval_loss": 0.3162487745285034, |
|
"eval_runtime": 142.0782, |
|
"eval_samples_per_second": 203.634, |
|
"eval_steps_per_second": 3.188, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.276641300888285e-05, |
|
"loss": 0.3326, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.8842803835868835, |
|
"eval_loss": 0.311200350522995, |
|
"eval_runtime": 142.1706, |
|
"eval_samples_per_second": 203.502, |
|
"eval_steps_per_second": 3.186, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.914961951332427e-05, |
|
"loss": 0.3293, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.8869763612747192, |
|
"eval_loss": 0.3043750524520874, |
|
"eval_runtime": 142.2145, |
|
"eval_samples_per_second": 203.439, |
|
"eval_steps_per_second": 3.185, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.5532826017765694e-05, |
|
"loss": 0.3246, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.8871491551399231, |
|
"eval_loss": 0.30401167273521423, |
|
"eval_runtime": 142.2635, |
|
"eval_samples_per_second": 203.369, |
|
"eval_steps_per_second": 3.184, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.191603252220711e-05, |
|
"loss": 0.32, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.8887736797332764, |
|
"eval_loss": 0.29694026708602905, |
|
"eval_runtime": 142.2955, |
|
"eval_samples_per_second": 203.323, |
|
"eval_steps_per_second": 3.184, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.829923902664854e-05, |
|
"loss": 0.3143, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.8903290629386902, |
|
"eval_loss": 0.2928813695907593, |
|
"eval_runtime": 142.3659, |
|
"eval_samples_per_second": 203.223, |
|
"eval_steps_per_second": 3.182, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.468244553108996e-05, |
|
"loss": 0.3095, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.8899142742156982, |
|
"eval_loss": 0.29173970222473145, |
|
"eval_runtime": 142.5262, |
|
"eval_samples_per_second": 202.994, |
|
"eval_steps_per_second": 3.178, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.106565203553138e-05, |
|
"loss": 0.2844, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.8886008858680725, |
|
"eval_loss": 0.29569417238235474, |
|
"eval_runtime": 142.5524, |
|
"eval_samples_per_second": 202.957, |
|
"eval_steps_per_second": 3.178, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7448858539972804e-05, |
|
"loss": 0.2778, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.890640139579773, |
|
"eval_loss": 0.2942551076412201, |
|
"eval_runtime": 142.6301, |
|
"eval_samples_per_second": 202.846, |
|
"eval_steps_per_second": 3.176, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.3832065044414225e-05, |
|
"loss": 0.2779, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.8934743404388428, |
|
"eval_loss": 0.28896576166152954, |
|
"eval_runtime": 142.7675, |
|
"eval_samples_per_second": 202.651, |
|
"eval_steps_per_second": 3.173, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.0215271548855646e-05, |
|
"loss": 0.2752, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.891884446144104, |
|
"eval_loss": 0.28808724880218506, |
|
"eval_runtime": 142.5778, |
|
"eval_samples_per_second": 202.921, |
|
"eval_steps_per_second": 3.177, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.598478053297069e-06, |
|
"loss": 0.2736, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.8943729996681213, |
|
"eval_loss": 0.28354716300964355, |
|
"eval_runtime": 142.8817, |
|
"eval_samples_per_second": 202.489, |
|
"eval_steps_per_second": 3.17, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.9816845577384916e-06, |
|
"loss": 0.2725, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.8941656351089478, |
|
"eval_loss": 0.28330111503601074, |
|
"eval_runtime": 142.8534, |
|
"eval_samples_per_second": 202.529, |
|
"eval_steps_per_second": 3.171, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 69122, |
|
"total_flos": 3.410043502198626e+17, |
|
"train_loss": 0.30218412260715205, |
|
"train_runtime": 63905.8286, |
|
"train_samples_per_second": 69.223, |
|
"train_steps_per_second": 1.082 |
|
} |
|
], |
|
"max_steps": 69122, |
|
"num_train_epochs": 2, |
|
"total_flos": 3.410043502198626e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|