{ "best_metric": 0.9714285714285714, "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-375", "epoch": 5.166666666666667, "eval_steps": 500, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 9.09181022644043, "learning_rate": 1.1111111111111112e-05, "loss": 2.3453, "step": 10 }, { "epoch": 0.04, "grad_norm": 9.389509201049805, "learning_rate": 2.2222222222222223e-05, "loss": 2.3958, "step": 20 }, { "epoch": 0.07, "grad_norm": 9.11400032043457, "learning_rate": 3.3333333333333335e-05, "loss": 2.2492, "step": 30 }, { "epoch": 0.09, "grad_norm": 9.454550743103027, "learning_rate": 4.4444444444444447e-05, "loss": 2.1811, "step": 40 }, { "epoch": 0.11, "grad_norm": 12.871296882629395, "learning_rate": 4.938271604938271e-05, "loss": 2.1261, "step": 50 }, { "epoch": 0.13, "grad_norm": 9.507197380065918, "learning_rate": 4.814814814814815e-05, "loss": 2.0149, "step": 60 }, { "epoch": 0.16, "grad_norm": 12.240191459655762, "learning_rate": 4.691358024691358e-05, "loss": 1.9248, "step": 70 }, { "epoch": 0.17, "eval_accuracy": 0.3142857142857143, "eval_loss": 1.6910260915756226, "eval_runtime": 13.1007, "eval_samples_per_second": 5.343, "eval_steps_per_second": 1.374, "step": 75 }, { "epoch": 1.01, "grad_norm": 13.400834083557129, "learning_rate": 4.567901234567901e-05, "loss": 1.4961, "step": 80 }, { "epoch": 1.03, "grad_norm": 12.832711219787598, "learning_rate": 4.4444444444444447e-05, "loss": 1.4544, "step": 90 }, { "epoch": 1.06, "grad_norm": 11.878938674926758, "learning_rate": 4.3209876543209875e-05, "loss": 1.3226, "step": 100 }, { "epoch": 1.08, "grad_norm": 8.267742156982422, "learning_rate": 4.197530864197531e-05, "loss": 0.9212, "step": 110 }, { "epoch": 1.1, "grad_norm": 15.088278770446777, "learning_rate": 4.074074074074074e-05, "loss": 0.8928, "step": 120 }, { "epoch": 1.12, "grad_norm": 14.688928604125977, "learning_rate": 3.950617283950617e-05, "loss": 0.6026, "step": 130 }, { "epoch": 1.14, "grad_norm": 10.090428352355957, "learning_rate": 3.82716049382716e-05, "loss": 0.7323, "step": 140 }, { "epoch": 1.17, "grad_norm": 28.017093658447266, "learning_rate": 3.7037037037037037e-05, "loss": 1.1184, "step": 150 }, { "epoch": 1.17, "eval_accuracy": 0.7, "eval_loss": 0.8519060611724854, "eval_runtime": 12.8721, "eval_samples_per_second": 5.438, "eval_steps_per_second": 1.398, "step": 150 }, { "epoch": 2.02, "grad_norm": 7.826801776885986, "learning_rate": 3.580246913580247e-05, "loss": 0.6328, "step": 160 }, { "epoch": 2.04, "grad_norm": 17.076839447021484, "learning_rate": 3.45679012345679e-05, "loss": 0.5843, "step": 170 }, { "epoch": 2.07, "grad_norm": 8.304590225219727, "learning_rate": 3.3333333333333335e-05, "loss": 0.7969, "step": 180 }, { "epoch": 2.09, "grad_norm": 7.5533223152160645, "learning_rate": 3.209876543209876e-05, "loss": 0.5387, "step": 190 }, { "epoch": 2.11, "grad_norm": 3.7337615489959717, "learning_rate": 3.08641975308642e-05, "loss": 0.5673, "step": 200 }, { "epoch": 2.13, "grad_norm": 23.18366050720215, "learning_rate": 2.962962962962963e-05, "loss": 0.5191, "step": 210 }, { "epoch": 2.16, "grad_norm": 11.092031478881836, "learning_rate": 2.839506172839506e-05, "loss": 0.3505, "step": 220 }, { "epoch": 2.17, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.5482387542724609, "eval_runtime": 12.8506, "eval_samples_per_second": 5.447, "eval_steps_per_second": 1.401, "step": 225 }, { "epoch": 3.01, "grad_norm": 1.397813320159912, "learning_rate": 2.7160493827160493e-05, "loss": 0.2652, "step": 230 }, { "epoch": 3.03, "grad_norm": 2.130690574645996, "learning_rate": 2.5925925925925925e-05, "loss": 0.1691, "step": 240 }, { "epoch": 3.06, "grad_norm": 7.323586940765381, "learning_rate": 2.4691358024691357e-05, "loss": 0.1669, "step": 250 }, { "epoch": 3.08, "grad_norm": 5.903147220611572, "learning_rate": 2.345679012345679e-05, "loss": 0.324, "step": 260 }, { "epoch": 3.1, "grad_norm": 24.850133895874023, "learning_rate": 2.2222222222222223e-05, "loss": 0.4857, "step": 270 }, { "epoch": 3.12, "grad_norm": 1.1520411968231201, "learning_rate": 2.0987654320987655e-05, "loss": 0.231, "step": 280 }, { "epoch": 3.14, "grad_norm": 0.6035730838775635, "learning_rate": 1.9753086419753087e-05, "loss": 0.3852, "step": 290 }, { "epoch": 3.17, "grad_norm": 18.224578857421875, "learning_rate": 1.8518518518518518e-05, "loss": 0.2939, "step": 300 }, { "epoch": 3.17, "eval_accuracy": 0.8285714285714286, "eval_loss": 0.5074731111526489, "eval_runtime": 12.6199, "eval_samples_per_second": 5.547, "eval_steps_per_second": 1.426, "step": 300 }, { "epoch": 4.02, "grad_norm": 4.320995330810547, "learning_rate": 1.728395061728395e-05, "loss": 0.1894, "step": 310 }, { "epoch": 4.04, "grad_norm": 4.220107555389404, "learning_rate": 1.604938271604938e-05, "loss": 0.2997, "step": 320 }, { "epoch": 4.07, "grad_norm": 0.14674872159957886, "learning_rate": 1.4814814814814815e-05, "loss": 0.2471, "step": 330 }, { "epoch": 4.09, "grad_norm": 1.210436224937439, "learning_rate": 1.3580246913580247e-05, "loss": 0.1045, "step": 340 }, { "epoch": 4.11, "grad_norm": 0.28732016682624817, "learning_rate": 1.2345679012345678e-05, "loss": 0.0638, "step": 350 }, { "epoch": 4.13, "grad_norm": 0.1985640674829483, "learning_rate": 1.1111111111111112e-05, "loss": 0.1419, "step": 360 }, { "epoch": 4.16, "grad_norm": 0.1826096773147583, "learning_rate": 9.876543209876543e-06, "loss": 0.0447, "step": 370 }, { "epoch": 4.17, "eval_accuracy": 0.9714285714285714, "eval_loss": 0.1740979254245758, "eval_runtime": 12.7121, "eval_samples_per_second": 5.507, "eval_steps_per_second": 1.416, "step": 375 }, { "epoch": 5.01, "grad_norm": 0.4490085244178772, "learning_rate": 8.641975308641975e-06, "loss": 0.0338, "step": 380 }, { "epoch": 5.03, "grad_norm": 0.615115761756897, "learning_rate": 7.4074074074074075e-06, "loss": 0.1366, "step": 390 }, { "epoch": 5.06, "grad_norm": 0.19469819962978363, "learning_rate": 6.172839506172839e-06, "loss": 0.2805, "step": 400 }, { "epoch": 5.08, "grad_norm": 0.10871770232915878, "learning_rate": 4.938271604938272e-06, "loss": 0.1858, "step": 410 }, { "epoch": 5.1, "grad_norm": 1.5411165952682495, "learning_rate": 3.7037037037037037e-06, "loss": 0.0189, "step": 420 }, { "epoch": 5.12, "grad_norm": 10.748401641845703, "learning_rate": 2.469135802469136e-06, "loss": 0.0882, "step": 430 }, { "epoch": 5.14, "grad_norm": 0.13508407771587372, "learning_rate": 1.234567901234568e-06, "loss": 0.1669, "step": 440 }, { "epoch": 5.17, "grad_norm": 6.92973518371582, "learning_rate": 0.0, "loss": 0.0643, "step": 450 }, { "epoch": 5.17, "eval_accuracy": 0.9571428571428572, "eval_loss": 0.1370178759098053, "eval_runtime": 12.713, "eval_samples_per_second": 5.506, "eval_steps_per_second": 1.416, "step": 450 }, { "epoch": 5.17, "step": 450, "total_flos": 2.243076282187776e+18, "train_loss": 0.7145391458272934, "train_runtime": 727.1717, "train_samples_per_second": 2.475, "train_steps_per_second": 0.619 }, { "epoch": 5.17, "eval_accuracy": 0.8516129032258064, "eval_loss": 0.6185441613197327, "eval_runtime": 29.0205, "eval_samples_per_second": 5.341, "eval_steps_per_second": 1.344, "step": 450 }, { "epoch": 5.17, "eval_accuracy": 0.8516129032258064, "eval_loss": 0.6185442209243774, "eval_runtime": 28.1289, "eval_samples_per_second": 5.51, "eval_steps_per_second": 1.386, "step": 450 } ], "logging_steps": 10, "max_steps": 450, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 2.243076282187776e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }