|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 5600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9125000238418579, |
|
"eval_loss": 0.2230108231306076, |
|
"eval_runtime": 5.588, |
|
"eval_samples_per_second": 200.429, |
|
"eval_steps_per_second": 25.054, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.131486410954287e-05, |
|
"loss": 0.3039, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9071428775787354, |
|
"eval_loss": 0.26908189058303833, |
|
"eval_runtime": 5.6261, |
|
"eval_samples_per_second": 199.073, |
|
"eval_steps_per_second": 24.884, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9160714149475098, |
|
"eval_loss": 0.28345081210136414, |
|
"eval_runtime": 5.3742, |
|
"eval_samples_per_second": 208.405, |
|
"eval_steps_per_second": 26.051, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.9225171549783764e-05, |
|
"loss": 0.104, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9241071343421936, |
|
"eval_loss": 0.39826202392578125, |
|
"eval_runtime": 5.6203, |
|
"eval_samples_per_second": 199.277, |
|
"eval_steps_per_second": 24.91, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9178571701049805, |
|
"eval_loss": 0.463891863822937, |
|
"eval_runtime": 5.6398, |
|
"eval_samples_per_second": 198.588, |
|
"eval_steps_per_second": 24.823, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 1.713547899002466e-05, |
|
"loss": 0.0361, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9205357432365417, |
|
"eval_loss": 0.46613049507141113, |
|
"eval_runtime": 5.5849, |
|
"eval_samples_per_second": 200.541, |
|
"eval_steps_per_second": 25.068, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9258928298950195, |
|
"eval_loss": 0.4626975357532501, |
|
"eval_runtime": 5.6519, |
|
"eval_samples_per_second": 198.163, |
|
"eval_steps_per_second": 24.77, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.5045786430265557e-05, |
|
"loss": 0.0228, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9258928298950195, |
|
"eval_loss": 0.4725589156150818, |
|
"eval_runtime": 5.5505, |
|
"eval_samples_per_second": 201.783, |
|
"eval_steps_per_second": 25.223, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1.2956093870506452e-05, |
|
"loss": 0.0158, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.925000011920929, |
|
"eval_loss": 0.5453544855117798, |
|
"eval_runtime": 5.5094, |
|
"eval_samples_per_second": 203.288, |
|
"eval_steps_per_second": 25.411, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9160714149475098, |
|
"eval_loss": 0.5454272627830505, |
|
"eval_runtime": 5.4705, |
|
"eval_samples_per_second": 204.734, |
|
"eval_steps_per_second": 25.592, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 1.0866401310747346e-05, |
|
"loss": 0.0146, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9196428656578064, |
|
"eval_loss": 0.5470275282859802, |
|
"eval_runtime": 5.5797, |
|
"eval_samples_per_second": 200.729, |
|
"eval_steps_per_second": 25.091, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9178571701049805, |
|
"eval_loss": 0.6207221746444702, |
|
"eval_runtime": 5.4687, |
|
"eval_samples_per_second": 204.802, |
|
"eval_steps_per_second": 25.6, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 8.776708750988241e-06, |
|
"loss": 0.0106, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9142857193946838, |
|
"eval_loss": 0.6711627840995789, |
|
"eval_runtime": 5.5881, |
|
"eval_samples_per_second": 200.427, |
|
"eval_steps_per_second": 25.053, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.918749988079071, |
|
"eval_loss": 0.6122187972068787, |
|
"eval_runtime": 5.7115, |
|
"eval_samples_per_second": 196.094, |
|
"eval_steps_per_second": 24.512, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 6.687016191229136e-06, |
|
"loss": 0.0101, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9196428656578064, |
|
"eval_loss": 0.6081933975219727, |
|
"eval_runtime": 5.6756, |
|
"eval_samples_per_second": 197.336, |
|
"eval_steps_per_second": 24.667, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9214285612106323, |
|
"eval_loss": 0.6368692517280579, |
|
"eval_runtime": 5.4708, |
|
"eval_samples_per_second": 204.723, |
|
"eval_steps_per_second": 25.59, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 4.59732363147003e-06, |
|
"loss": 0.0108, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9205357432365417, |
|
"eval_loss": 0.6168231964111328, |
|
"eval_runtime": 5.8239, |
|
"eval_samples_per_second": 192.312, |
|
"eval_steps_per_second": 24.039, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 2.507631071710926e-06, |
|
"loss": 0.0069, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9196428656578064, |
|
"eval_loss": 0.6445040106773376, |
|
"eval_runtime": 5.6834, |
|
"eval_samples_per_second": 197.066, |
|
"eval_steps_per_second": 24.633, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9178571701049805, |
|
"eval_loss": 0.6451646089553833, |
|
"eval_runtime": 5.6811, |
|
"eval_samples_per_second": 197.145, |
|
"eval_steps_per_second": 24.643, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 4.17938511951821e-07, |
|
"loss": 0.0097, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9205357432365417, |
|
"eval_loss": 0.6383888721466064, |
|
"eval_runtime": 5.7353, |
|
"eval_samples_per_second": 195.283, |
|
"eval_steps_per_second": 24.41, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5600, |
|
"total_flos": 5893687640064000.0, |
|
"train_loss": 0.04879493878356048, |
|
"train_runtime": 2058.2783, |
|
"train_samples_per_second": 43.532, |
|
"train_steps_per_second": 2.721 |
|
} |
|
], |
|
"max_steps": 5600, |
|
"num_train_epochs": 20, |
|
"total_flos": 5893687640064000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|