|
{ |
|
"best_metric": 0.9908256880733946, |
|
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-U/checkpoint-217", |
|
"epoch": 36.12903225806452, |
|
"eval_steps": 500, |
|
"global_step": 280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.25688073394495414, |
|
"eval_loss": 1.4318687915802002, |
|
"eval_runtime": 1.4321, |
|
"eval_samples_per_second": 76.112, |
|
"eval_steps_per_second": 2.793, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 1.3911, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.47706422018348627, |
|
"eval_loss": 1.213340163230896, |
|
"eval_runtime": 1.4871, |
|
"eval_samples_per_second": 73.295, |
|
"eval_steps_per_second": 2.69, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.6055045871559633, |
|
"eval_loss": 0.9487143158912659, |
|
"eval_runtime": 1.5892, |
|
"eval_samples_per_second": 68.589, |
|
"eval_steps_per_second": 2.517, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 1.0766, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7155963302752294, |
|
"eval_loss": 0.6542130708694458, |
|
"eval_runtime": 1.5217, |
|
"eval_samples_per_second": 71.63, |
|
"eval_steps_per_second": 2.629, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 4.841269841269841e-05, |
|
"loss": 0.6974, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.8715596330275229, |
|
"eval_loss": 0.4643765389919281, |
|
"eval_runtime": 1.561, |
|
"eval_samples_per_second": 69.827, |
|
"eval_steps_per_second": 2.562, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.8715596330275229, |
|
"eval_loss": 0.39188772439956665, |
|
"eval_runtime": 1.5611, |
|
"eval_samples_per_second": 69.823, |
|
"eval_steps_per_second": 2.562, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 4.603174603174603e-05, |
|
"loss": 0.421, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.8715596330275229, |
|
"eval_loss": 0.30943310260772705, |
|
"eval_runtime": 1.6488, |
|
"eval_samples_per_second": 66.109, |
|
"eval_steps_per_second": 2.426, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 0.2513, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8990825688073395, |
|
"eval_loss": 0.2334030568599701, |
|
"eval_runtime": 1.6748, |
|
"eval_samples_per_second": 65.082, |
|
"eval_steps_per_second": 2.388, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.9174311926605505, |
|
"eval_loss": 0.19148482382297516, |
|
"eval_runtime": 1.68, |
|
"eval_samples_per_second": 64.88, |
|
"eval_steps_per_second": 2.381, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 4.126984126984127e-05, |
|
"loss": 0.1931, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.8807339449541285, |
|
"eval_loss": 0.24314457178115845, |
|
"eval_runtime": 1.7464, |
|
"eval_samples_per_second": 62.412, |
|
"eval_steps_per_second": 2.29, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1757, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.944954128440367, |
|
"eval_loss": 0.16078265011310577, |
|
"eval_runtime": 1.6471, |
|
"eval_samples_per_second": 66.177, |
|
"eval_steps_per_second": 2.428, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.926605504587156, |
|
"eval_loss": 0.1423795074224472, |
|
"eval_runtime": 1.7337, |
|
"eval_samples_per_second": 62.87, |
|
"eval_steps_per_second": 2.307, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 3.650793650793651e-05, |
|
"loss": 0.1442, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.944954128440367, |
|
"eval_loss": 0.1280096173286438, |
|
"eval_runtime": 1.6549, |
|
"eval_samples_per_second": 65.864, |
|
"eval_steps_per_second": 2.417, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 3.412698412698413e-05, |
|
"loss": 0.1085, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.9541284403669725, |
|
"eval_loss": 0.10545489937067032, |
|
"eval_runtime": 1.6964, |
|
"eval_samples_per_second": 64.255, |
|
"eval_steps_per_second": 2.358, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.9541284403669725, |
|
"eval_loss": 0.1080455482006073, |
|
"eval_runtime": 1.5479, |
|
"eval_samples_per_second": 70.417, |
|
"eval_steps_per_second": 2.584, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.1056, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.963302752293578, |
|
"eval_loss": 0.09967872500419617, |
|
"eval_runtime": 1.7093, |
|
"eval_samples_per_second": 63.77, |
|
"eval_steps_per_second": 2.34, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.963302752293578, |
|
"eval_loss": 0.11852575093507767, |
|
"eval_runtime": 1.5272, |
|
"eval_samples_per_second": 71.372, |
|
"eval_steps_per_second": 2.619, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 2.9365079365079366e-05, |
|
"loss": 0.0926, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.963302752293578, |
|
"eval_loss": 0.07731892913579941, |
|
"eval_runtime": 1.5604, |
|
"eval_samples_per_second": 69.856, |
|
"eval_steps_per_second": 2.564, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 2.6984126984126984e-05, |
|
"loss": 0.103, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.963302752293578, |
|
"eval_loss": 0.12785662710666656, |
|
"eval_runtime": 1.527, |
|
"eval_samples_per_second": 71.384, |
|
"eval_steps_per_second": 2.62, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.963302752293578, |
|
"eval_loss": 0.10433010756969452, |
|
"eval_runtime": 1.5327, |
|
"eval_samples_per_second": 71.115, |
|
"eval_steps_per_second": 2.61, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.13, |
|
"learning_rate": 2.4603174603174602e-05, |
|
"loss": 0.0938, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.981651376146789, |
|
"eval_loss": 0.08241702616214752, |
|
"eval_runtime": 1.5631, |
|
"eval_samples_per_second": 69.735, |
|
"eval_steps_per_second": 2.559, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0891, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.9541284403669725, |
|
"eval_loss": 0.14486828446388245, |
|
"eval_runtime": 1.5236, |
|
"eval_samples_per_second": 71.54, |
|
"eval_steps_per_second": 2.625, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.963302752293578, |
|
"eval_loss": 0.13658782839775085, |
|
"eval_runtime": 1.7185, |
|
"eval_samples_per_second": 63.429, |
|
"eval_steps_per_second": 2.328, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.0754, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9357798165137615, |
|
"eval_loss": 0.11480199545621872, |
|
"eval_runtime": 1.6552, |
|
"eval_samples_per_second": 65.853, |
|
"eval_steps_per_second": 2.417, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"learning_rate": 1.746031746031746e-05, |
|
"loss": 0.0882, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.9357798165137615, |
|
"eval_loss": 0.19915136694908142, |
|
"eval_runtime": 1.6537, |
|
"eval_samples_per_second": 65.913, |
|
"eval_steps_per_second": 2.419, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.981651376146789, |
|
"eval_loss": 0.07425253838300705, |
|
"eval_runtime": 1.4966, |
|
"eval_samples_per_second": 72.83, |
|
"eval_steps_per_second": 2.673, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 1.5079365079365079e-05, |
|
"loss": 0.078, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.9724770642201835, |
|
"eval_loss": 0.06684111058712006, |
|
"eval_runtime": 1.5659, |
|
"eval_samples_per_second": 69.61, |
|
"eval_steps_per_second": 2.554, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"learning_rate": 1.2698412698412699e-05, |
|
"loss": 0.0666, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9908256880733946, |
|
"eval_loss": 0.053366996347904205, |
|
"eval_runtime": 1.5382, |
|
"eval_samples_per_second": 70.862, |
|
"eval_steps_per_second": 2.6, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.9908256880733946, |
|
"eval_loss": 0.049850545823574066, |
|
"eval_runtime": 1.5346, |
|
"eval_samples_per_second": 71.027, |
|
"eval_steps_per_second": 2.607, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"learning_rate": 1.0317460317460318e-05, |
|
"loss": 0.0514, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.9724770642201835, |
|
"eval_loss": 0.043298669159412384, |
|
"eval_runtime": 1.5329, |
|
"eval_samples_per_second": 71.108, |
|
"eval_steps_per_second": 2.609, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.062, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.963302752293578, |
|
"eval_loss": 0.08402539044618607, |
|
"eval_runtime": 1.7473, |
|
"eval_samples_per_second": 62.382, |
|
"eval_steps_per_second": 2.289, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9724770642201835, |
|
"eval_loss": 0.05133233219385147, |
|
"eval_runtime": 1.6441, |
|
"eval_samples_per_second": 66.297, |
|
"eval_steps_per_second": 2.433, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.52, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0712, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.981651376146789, |
|
"eval_loss": 0.04824218526482582, |
|
"eval_runtime": 1.6796, |
|
"eval_samples_per_second": 64.896, |
|
"eval_steps_per_second": 2.382, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.981651376146789, |
|
"eval_loss": 0.055325187742710114, |
|
"eval_runtime": 1.5821, |
|
"eval_samples_per_second": 68.894, |
|
"eval_steps_per_second": 2.528, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.06, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 0.0703, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.9724770642201835, |
|
"eval_loss": 0.060162752866744995, |
|
"eval_runtime": 1.5164, |
|
"eval_samples_per_second": 71.882, |
|
"eval_steps_per_second": 2.638, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 35.61, |
|
"learning_rate": 7.936507936507937e-07, |
|
"loss": 0.0553, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9724770642201835, |
|
"eval_loss": 0.05953967571258545, |
|
"eval_runtime": 1.5574, |
|
"eval_samples_per_second": 69.99, |
|
"eval_steps_per_second": 2.568, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"eval_accuracy": 0.9724770642201835, |
|
"eval_loss": 0.05953451991081238, |
|
"eval_runtime": 1.6854, |
|
"eval_samples_per_second": 64.671, |
|
"eval_steps_per_second": 2.373, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"step": 280, |
|
"total_flos": 2.738306029282984e+18, |
|
"train_loss": 0.23923614642449786, |
|
"train_runtime": 1562.4778, |
|
"train_samples_per_second": 25.037, |
|
"train_steps_per_second": 0.179 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 280, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 2.738306029282984e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|