|
{ |
|
"best_metric": 2.206695079803467, |
|
"best_model_checkpoint": "detr-r50-finetuned-mist1-gb-4ah-6l/checkpoint-5060", |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 5750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.812173913043479e-06, |
|
"loss": 3.4044, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.004835844039917, |
|
"eval_runtime": 5.8385, |
|
"eval_samples_per_second": 6.851, |
|
"eval_steps_per_second": 0.856, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.612173913043479e-06, |
|
"loss": 3.1708, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.902806520462036, |
|
"eval_runtime": 5.7867, |
|
"eval_samples_per_second": 6.912, |
|
"eval_steps_per_second": 0.864, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.412173913043479e-06, |
|
"loss": 3.0756, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.853818416595459, |
|
"eval_runtime": 5.8759, |
|
"eval_samples_per_second": 6.808, |
|
"eval_steps_per_second": 0.851, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.21217391304348e-06, |
|
"loss": 2.9769, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.814894914627075, |
|
"eval_runtime": 5.8445, |
|
"eval_samples_per_second": 6.844, |
|
"eval_steps_per_second": 0.855, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.01217391304348e-06, |
|
"loss": 2.8999, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.7331762313842773, |
|
"eval_runtime": 5.9217, |
|
"eval_samples_per_second": 6.755, |
|
"eval_steps_per_second": 0.844, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 8.81217391304348e-06, |
|
"loss": 2.8609, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.7212414741516113, |
|
"eval_runtime": 5.879, |
|
"eval_samples_per_second": 6.804, |
|
"eval_steps_per_second": 0.85, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 8.613913043478262e-06, |
|
"loss": 2.8338, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.689382553100586, |
|
"eval_runtime": 5.8299, |
|
"eval_samples_per_second": 6.861, |
|
"eval_steps_per_second": 0.858, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 8.413913043478262e-06, |
|
"loss": 2.8103, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.704505681991577, |
|
"eval_runtime": 5.7873, |
|
"eval_samples_per_second": 6.912, |
|
"eval_steps_per_second": 0.864, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 8.21391304347826e-06, |
|
"loss": 2.8036, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.778614044189453, |
|
"eval_runtime": 5.7444, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 0.87, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 8.013913043478262e-06, |
|
"loss": 2.7486, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.688080310821533, |
|
"eval_runtime": 5.7616, |
|
"eval_samples_per_second": 6.943, |
|
"eval_steps_per_second": 0.868, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 7.813913043478263e-06, |
|
"loss": 2.7076, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.605870485305786, |
|
"eval_runtime": 5.8415, |
|
"eval_samples_per_second": 6.848, |
|
"eval_steps_per_second": 0.856, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 7.615652173913044e-06, |
|
"loss": 2.7156, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.648322343826294, |
|
"eval_runtime": 5.8314, |
|
"eval_samples_per_second": 6.859, |
|
"eval_steps_per_second": 0.857, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 7.415652173913044e-06, |
|
"loss": 2.6655, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.5438005924224854, |
|
"eval_runtime": 5.7667, |
|
"eval_samples_per_second": 6.936, |
|
"eval_steps_per_second": 0.867, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 7.215652173913043e-06, |
|
"loss": 2.6368, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.5342297554016113, |
|
"eval_runtime": 5.7673, |
|
"eval_samples_per_second": 6.936, |
|
"eval_steps_per_second": 0.867, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 7.015652173913044e-06, |
|
"loss": 2.5982, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.5287182331085205, |
|
"eval_runtime": 5.8321, |
|
"eval_samples_per_second": 6.859, |
|
"eval_steps_per_second": 0.857, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 6.815652173913045e-06, |
|
"loss": 2.6116, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.4446094036102295, |
|
"eval_runtime": 5.7529, |
|
"eval_samples_per_second": 6.953, |
|
"eval_steps_per_second": 0.869, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 6.615652173913044e-06, |
|
"loss": 2.5592, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.4365103244781494, |
|
"eval_runtime": 5.7499, |
|
"eval_samples_per_second": 6.957, |
|
"eval_steps_per_second": 0.87, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 6.415652173913044e-06, |
|
"loss": 2.5528, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.484395980834961, |
|
"eval_runtime": 5.7801, |
|
"eval_samples_per_second": 6.92, |
|
"eval_steps_per_second": 0.865, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 6.215652173913044e-06, |
|
"loss": 2.5248, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.4194891452789307, |
|
"eval_runtime": 5.7656, |
|
"eval_samples_per_second": 6.938, |
|
"eval_steps_per_second": 0.867, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.015652173913044e-06, |
|
"loss": 2.4853, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 2.453843832015991, |
|
"eval_runtime": 5.8034, |
|
"eval_samples_per_second": 6.893, |
|
"eval_steps_per_second": 0.862, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 5.815652173913045e-06, |
|
"loss": 2.5295, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 2.569608211517334, |
|
"eval_runtime": 5.8097, |
|
"eval_samples_per_second": 6.885, |
|
"eval_steps_per_second": 0.861, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 5.615652173913044e-06, |
|
"loss": 2.5069, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 2.4536538124084473, |
|
"eval_runtime": 5.7892, |
|
"eval_samples_per_second": 6.909, |
|
"eval_steps_per_second": 0.864, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 5.4156521739130445e-06, |
|
"loss": 2.4504, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 2.515150547027588, |
|
"eval_runtime": 5.8674, |
|
"eval_samples_per_second": 6.817, |
|
"eval_steps_per_second": 0.852, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 5.215652173913044e-06, |
|
"loss": 2.4447, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 2.4431958198547363, |
|
"eval_runtime": 5.9181, |
|
"eval_samples_per_second": 6.759, |
|
"eval_steps_per_second": 0.845, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 5.015652173913044e-06, |
|
"loss": 2.4303, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 2.4032533168792725, |
|
"eval_runtime": 5.9198, |
|
"eval_samples_per_second": 6.757, |
|
"eval_steps_per_second": 0.845, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 4.815652173913044e-06, |
|
"loss": 2.4137, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 2.3795552253723145, |
|
"eval_runtime": 5.8418, |
|
"eval_samples_per_second": 6.847, |
|
"eval_steps_per_second": 0.856, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 4.615652173913044e-06, |
|
"loss": 2.41, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 2.3598742485046387, |
|
"eval_runtime": 5.8868, |
|
"eval_samples_per_second": 6.795, |
|
"eval_steps_per_second": 0.849, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 4.415652173913044e-06, |
|
"loss": 2.3816, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 2.401759386062622, |
|
"eval_runtime": 5.8373, |
|
"eval_samples_per_second": 6.852, |
|
"eval_steps_per_second": 0.857, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 4.215652173913044e-06, |
|
"loss": 2.3752, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 2.3115689754486084, |
|
"eval_runtime": 5.8962, |
|
"eval_samples_per_second": 6.784, |
|
"eval_steps_per_second": 0.848, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.0156521739130435e-06, |
|
"loss": 2.3929, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 2.310469150543213, |
|
"eval_runtime": 5.8122, |
|
"eval_samples_per_second": 6.882, |
|
"eval_steps_per_second": 0.86, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 3.815652173913044e-06, |
|
"loss": 2.3791, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 2.367689371109009, |
|
"eval_runtime": 5.837, |
|
"eval_samples_per_second": 6.853, |
|
"eval_steps_per_second": 0.857, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 3.615652173913044e-06, |
|
"loss": 2.3639, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 2.431199550628662, |
|
"eval_runtime": 5.8179, |
|
"eval_samples_per_second": 6.875, |
|
"eval_steps_per_second": 0.859, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 3.4156521739130437e-06, |
|
"loss": 2.3475, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 2.3051953315734863, |
|
"eval_runtime": 5.7888, |
|
"eval_samples_per_second": 6.91, |
|
"eval_steps_per_second": 0.864, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 3.2156521739130435e-06, |
|
"loss": 2.3429, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 2.322197675704956, |
|
"eval_runtime": 5.7936, |
|
"eval_samples_per_second": 6.904, |
|
"eval_steps_per_second": 0.863, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 3.015652173913044e-06, |
|
"loss": 2.3115, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 2.3126182556152344, |
|
"eval_runtime": 5.8416, |
|
"eval_samples_per_second": 6.847, |
|
"eval_steps_per_second": 0.856, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2.815652173913044e-06, |
|
"loss": 2.3276, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 2.3154168128967285, |
|
"eval_runtime": 5.7988, |
|
"eval_samples_per_second": 6.898, |
|
"eval_steps_per_second": 0.862, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.6156521739130438e-06, |
|
"loss": 2.3126, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 2.353442430496216, |
|
"eval_runtime": 5.7487, |
|
"eval_samples_per_second": 6.958, |
|
"eval_steps_per_second": 0.87, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 2.417391304347826e-06, |
|
"loss": 2.2934, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 2.2566468715667725, |
|
"eval_runtime": 5.902, |
|
"eval_samples_per_second": 6.777, |
|
"eval_steps_per_second": 0.847, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 2.2173913043478264e-06, |
|
"loss": 2.2901, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 2.274752140045166, |
|
"eval_runtime": 5.8191, |
|
"eval_samples_per_second": 6.874, |
|
"eval_steps_per_second": 0.859, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 2.017391304347826e-06, |
|
"loss": 2.2622, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 2.2620463371276855, |
|
"eval_runtime": 5.7456, |
|
"eval_samples_per_second": 6.962, |
|
"eval_steps_per_second": 0.87, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 1.8173913043478262e-06, |
|
"loss": 2.2707, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 2.2336184978485107, |
|
"eval_runtime": 5.8344, |
|
"eval_samples_per_second": 6.856, |
|
"eval_steps_per_second": 0.857, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 1.6173913043478262e-06, |
|
"loss": 2.2338, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 2.224193811416626, |
|
"eval_runtime": 5.7896, |
|
"eval_samples_per_second": 6.909, |
|
"eval_steps_per_second": 0.864, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 1.4173913043478262e-06, |
|
"loss": 2.2457, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 2.219238758087158, |
|
"eval_runtime": 5.8096, |
|
"eval_samples_per_second": 6.885, |
|
"eval_steps_per_second": 0.861, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 1.2173913043478262e-06, |
|
"loss": 2.227, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 2.206695079803467, |
|
"eval_runtime": 5.7447, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 0.87, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 1.0173913043478262e-06, |
|
"loss": 2.2215, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 2.2183449268341064, |
|
"eval_runtime": 5.7702, |
|
"eval_samples_per_second": 6.932, |
|
"eval_steps_per_second": 0.867, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 8.173913043478261e-07, |
|
"loss": 2.2075, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 2.2187769412994385, |
|
"eval_runtime": 5.7737, |
|
"eval_samples_per_second": 6.928, |
|
"eval_steps_per_second": 0.866, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 6.173913043478262e-07, |
|
"loss": 2.2286, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 2.2306272983551025, |
|
"eval_runtime": 5.7527, |
|
"eval_samples_per_second": 6.953, |
|
"eval_steps_per_second": 0.869, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 4.1739130434782616e-07, |
|
"loss": 2.2292, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 2.2159781455993652, |
|
"eval_runtime": 5.8705, |
|
"eval_samples_per_second": 6.814, |
|
"eval_steps_per_second": 0.852, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 2.173913043478261e-07, |
|
"loss": 2.219, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 2.2207822799682617, |
|
"eval_runtime": 5.7679, |
|
"eval_samples_per_second": 6.935, |
|
"eval_steps_per_second": 0.867, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.739130434782609e-08, |
|
"loss": 2.2125, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 2.216578722000122, |
|
"eval_runtime": 5.8453, |
|
"eval_samples_per_second": 6.843, |
|
"eval_steps_per_second": 0.855, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 5750, |
|
"total_flos": 1.098949102848e+19, |
|
"train_loss": 2.5100708697775134, |
|
"train_runtime": 4619.9285, |
|
"train_samples_per_second": 4.978, |
|
"train_steps_per_second": 1.245 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5750, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.098949102848e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|