|
{ |
|
"best_metric": 0.85, |
|
"best_model_checkpoint": "videomae-base-finetuned-engine-subset-20230310/checkpoint-372", |
|
"epoch": 19.018333333333334, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 2.8307, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.6132, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.5947, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.15, |
|
"eval_loss": 2.538285493850708, |
|
"eval_runtime": 64.1944, |
|
"eval_samples_per_second": 1.246, |
|
"eval_steps_per_second": 0.218, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.5062, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 2.665, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4195, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.15, |
|
"eval_loss": 2.5108141899108887, |
|
"eval_runtime": 62.8569, |
|
"eval_samples_per_second": 1.273, |
|
"eval_steps_per_second": 0.223, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 2.3539, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 2.3374, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 2.2476, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 2.0532896518707275, |
|
"eval_runtime": 62.7732, |
|
"eval_samples_per_second": 1.274, |
|
"eval_steps_per_second": 0.223, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 2.2757, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 1.9768, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.9449, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_accuracy": 0.2375, |
|
"eval_loss": 2.071887493133545, |
|
"eval_runtime": 65.8713, |
|
"eval_samples_per_second": 1.214, |
|
"eval_steps_per_second": 0.213, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 1.7178, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 1.6786, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.5724, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_accuracy": 0.475, |
|
"eval_loss": 1.4755998849868774, |
|
"eval_runtime": 62.826, |
|
"eval_samples_per_second": 1.273, |
|
"eval_steps_per_second": 0.223, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 1.3465, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 1.3641, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 1.395, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.2884117364883423, |
|
"eval_runtime": 63.6279, |
|
"eval_samples_per_second": 1.257, |
|
"eval_steps_per_second": 0.22, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 1.1859, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.2224, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 1.0822, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_accuracy": 0.575, |
|
"eval_loss": 1.0678651332855225, |
|
"eval_runtime": 63.3472, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.221, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.9066, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.7939, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.0635, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8040415048599243, |
|
"eval_runtime": 63.8719, |
|
"eval_samples_per_second": 1.253, |
|
"eval_steps_per_second": 0.219, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 1.1727, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.8291, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.8707, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_accuracy": 0.525, |
|
"eval_loss": 0.9334062337875366, |
|
"eval_runtime": 64.0172, |
|
"eval_samples_per_second": 1.25, |
|
"eval_steps_per_second": 0.219, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.8339, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.8118, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.8068, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.7042, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6476640701293945, |
|
"eval_runtime": 63.3755, |
|
"eval_samples_per_second": 1.262, |
|
"eval_steps_per_second": 0.221, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.6771, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.7369, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.6543, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"eval_accuracy": 0.7375, |
|
"eval_loss": 0.6962689757347107, |
|
"eval_runtime": 63.3385, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.221, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.7641, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.5571, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.6807, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.49579018354415894, |
|
"eval_runtime": 64.8171, |
|
"eval_samples_per_second": 1.234, |
|
"eval_steps_per_second": 0.216, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.5435, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.5266, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.4924, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"eval_accuracy": 0.775, |
|
"eval_loss": 0.6373826265335083, |
|
"eval_runtime": 67.4155, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.208, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.4775, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.5071, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.4822, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6144760847091675, |
|
"eval_runtime": 64.8207, |
|
"eval_samples_per_second": 1.234, |
|
"eval_steps_per_second": 0.216, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.3259, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.5054, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.4878, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"eval_accuracy": 0.7625, |
|
"eval_loss": 0.6274302005767822, |
|
"eval_runtime": 62.4581, |
|
"eval_samples_per_second": 1.281, |
|
"eval_steps_per_second": 0.224, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.3902, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.4728, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.4442, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.42305102944374084, |
|
"eval_runtime": 62.8623, |
|
"eval_samples_per_second": 1.273, |
|
"eval_steps_per_second": 0.223, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.443, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.3427, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.2739, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.4999323785305023, |
|
"eval_runtime": 63.9597, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.219, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.3676, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.5171, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.3514, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"eval_accuracy": 0.8375, |
|
"eval_loss": 0.4638718068599701, |
|
"eval_runtime": 65.0616, |
|
"eval_samples_per_second": 1.23, |
|
"eval_steps_per_second": 0.215, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.2877, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.3558, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.4158, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.42912358045578003, |
|
"eval_runtime": 64.0669, |
|
"eval_samples_per_second": 1.249, |
|
"eval_steps_per_second": 0.219, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.3158, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 0.0, |
|
"loss": 0.2689, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.42943257093429565, |
|
"eval_runtime": 64.3343, |
|
"eval_samples_per_second": 1.244, |
|
"eval_steps_per_second": 0.218, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"step": 600, |
|
"total_flos": 4.415279302910214e+18, |
|
"train_loss": 1.063151851495107, |
|
"train_runtime": 5413.8579, |
|
"train_samples_per_second": 0.665, |
|
"train_steps_per_second": 0.111 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.49579018354415894, |
|
"eval_runtime": 66.0758, |
|
"eval_samples_per_second": 1.211, |
|
"eval_steps_per_second": 0.212, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.49579015374183655, |
|
"eval_runtime": 64.7531, |
|
"eval_samples_per_second": 1.235, |
|
"eval_steps_per_second": 0.216, |
|
"step": 600 |
|
} |
|
], |
|
"max_steps": 600, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 4.415279302910214e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|