{ "best_metric": 0.9782797567332754, "best_model_checkpoint": "convnext-large-224-finetuned-dog-vs-cat/checkpoint-492", "epoch": 2.986342943854325, "eval_steps": 500, "global_step": 492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06069802731411229, "grad_norm": 1.2104277610778809, "learning_rate": 1e-05, "loss": 0.6885, "step": 10 }, { "epoch": 0.12139605462822459, "grad_norm": 1.1519063711166382, "learning_rate": 2e-05, "loss": 0.6854, "step": 20 }, { "epoch": 0.18209408194233687, "grad_norm": 1.203668475151062, "learning_rate": 3e-05, "loss": 0.6788, "step": 30 }, { "epoch": 0.24279210925644917, "grad_norm": 1.3230795860290527, "learning_rate": 4e-05, "loss": 0.6664, "step": 40 }, { "epoch": 0.30349013657056145, "grad_norm": 1.1332818269729614, "learning_rate": 5e-05, "loss": 0.6511, "step": 50 }, { "epoch": 0.36418816388467373, "grad_norm": 1.1424264907836914, "learning_rate": 4.8868778280542986e-05, "loss": 0.6321, "step": 60 }, { "epoch": 0.424886191198786, "grad_norm": 1.1293511390686035, "learning_rate": 4.7737556561085976e-05, "loss": 0.6115, "step": 70 }, { "epoch": 0.48558421851289835, "grad_norm": 1.1212339401245117, "learning_rate": 4.660633484162896e-05, "loss": 0.5957, "step": 80 }, { "epoch": 0.5462822458270106, "grad_norm": 0.9936708807945251, "learning_rate": 4.547511312217195e-05, "loss": 0.5815, "step": 90 }, { "epoch": 0.6069802731411229, "grad_norm": 1.0561779737472534, "learning_rate": 4.434389140271493e-05, "loss": 0.5583, "step": 100 }, { "epoch": 0.6676783004552352, "grad_norm": 0.9779506921768188, "learning_rate": 4.321266968325792e-05, "loss": 0.5541, "step": 110 }, { "epoch": 0.7283763277693475, "grad_norm": 0.8941373825073242, "learning_rate": 4.2081447963800907e-05, "loss": 0.5408, "step": 120 }, { "epoch": 0.7890743550834598, "grad_norm": 0.9414535164833069, "learning_rate": 4.095022624434389e-05, "loss": 0.5288, "step": 130 }, { "epoch": 0.849772382397572, "grad_norm": 0.8809621334075928, "learning_rate": 3.981900452488688e-05, "loss": 0.5194, "step": 140 }, { "epoch": 0.9104704097116844, "grad_norm": 0.8484501838684082, "learning_rate": 3.868778280542987e-05, "loss": 0.5038, "step": 150 }, { "epoch": 0.9711684370257967, "grad_norm": 0.8708747625350952, "learning_rate": 3.7556561085972854e-05, "loss": 0.496, "step": 160 }, { "epoch": 0.9954476479514416, "eval_loss": 0.4791725277900696, "eval_recall": 0.9200695047784535, "eval_runtime": 60.2024, "eval_samples_per_second": 38.885, "eval_steps_per_second": 1.229, "step": 164 }, { "epoch": 1.031866464339909, "grad_norm": 0.8436847925186157, "learning_rate": 3.642533936651584e-05, "loss": 0.4861, "step": 170 }, { "epoch": 1.0925644916540211, "grad_norm": 0.7819939851760864, "learning_rate": 3.529411764705883e-05, "loss": 0.4844, "step": 180 }, { "epoch": 1.1532625189681336, "grad_norm": 0.847568929195404, "learning_rate": 3.416289592760181e-05, "loss": 0.4673, "step": 190 }, { "epoch": 1.2139605462822458, "grad_norm": 0.7669057846069336, "learning_rate": 3.3031674208144794e-05, "loss": 0.4583, "step": 200 }, { "epoch": 1.274658573596358, "grad_norm": 0.766395092010498, "learning_rate": 3.1900452488687784e-05, "loss": 0.4473, "step": 210 }, { "epoch": 1.3353566009104705, "grad_norm": 0.8044094443321228, "learning_rate": 3.0769230769230774e-05, "loss": 0.4531, "step": 220 }, { "epoch": 1.3960546282245827, "grad_norm": 0.7774869799613953, "learning_rate": 2.9638009049773758e-05, "loss": 0.439, "step": 230 }, { "epoch": 1.456752655538695, "grad_norm": 0.7320767641067505, "learning_rate": 2.850678733031674e-05, "loss": 0.436, "step": 240 }, { "epoch": 1.5174506828528074, "grad_norm": 0.7127722501754761, "learning_rate": 2.737556561085973e-05, "loss": 0.4314, "step": 250 }, { "epoch": 1.5781487101669196, "grad_norm": 0.7172250151634216, "learning_rate": 2.6244343891402718e-05, "loss": 0.4302, "step": 260 }, { "epoch": 1.6388467374810318, "grad_norm": 0.7075967192649841, "learning_rate": 2.51131221719457e-05, "loss": 0.4174, "step": 270 }, { "epoch": 1.699544764795144, "grad_norm": 0.7165055871009827, "learning_rate": 2.3981900452488688e-05, "loss": 0.4174, "step": 280 }, { "epoch": 1.7602427921092565, "grad_norm": 0.6554096341133118, "learning_rate": 2.2850678733031675e-05, "loss": 0.4193, "step": 290 }, { "epoch": 1.8209408194233687, "grad_norm": 0.7225818037986755, "learning_rate": 2.1719457013574662e-05, "loss": 0.3996, "step": 300 }, { "epoch": 1.8816388467374812, "grad_norm": 0.6873008012771606, "learning_rate": 2.058823529411765e-05, "loss": 0.4011, "step": 310 }, { "epoch": 1.9423368740515934, "grad_norm": 0.6827677488327026, "learning_rate": 1.9457013574660635e-05, "loss": 0.3996, "step": 320 }, { "epoch": 1.9969650986342944, "eval_loss": 0.3836117684841156, "eval_recall": 0.9765421372719374, "eval_runtime": 60.251, "eval_samples_per_second": 38.854, "eval_steps_per_second": 1.228, "step": 329 }, { "epoch": 2.0030349013657056, "grad_norm": 0.6939449906349182, "learning_rate": 1.832579185520362e-05, "loss": 0.4015, "step": 330 }, { "epoch": 2.063732928679818, "grad_norm": 0.6604830026626587, "learning_rate": 1.7194570135746606e-05, "loss": 0.3934, "step": 340 }, { "epoch": 2.12443095599393, "grad_norm": 0.6591904759407043, "learning_rate": 1.6063348416289596e-05, "loss": 0.3956, "step": 350 }, { "epoch": 2.1851289833080423, "grad_norm": 0.6776930689811707, "learning_rate": 1.493212669683258e-05, "loss": 0.3904, "step": 360 }, { "epoch": 2.245827010622155, "grad_norm": 0.6641884446144104, "learning_rate": 1.3800904977375568e-05, "loss": 0.3836, "step": 370 }, { "epoch": 2.306525037936267, "grad_norm": 0.6640185117721558, "learning_rate": 1.2669683257918553e-05, "loss": 0.3849, "step": 380 }, { "epoch": 2.3672230652503794, "grad_norm": 0.6439262628555298, "learning_rate": 1.153846153846154e-05, "loss": 0.3847, "step": 390 }, { "epoch": 2.4279210925644916, "grad_norm": 0.6991675496101379, "learning_rate": 1.0407239819004526e-05, "loss": 0.3789, "step": 400 }, { "epoch": 2.488619119878604, "grad_norm": 0.6298201680183411, "learning_rate": 9.276018099547511e-06, "loss": 0.3769, "step": 410 }, { "epoch": 2.549317147192716, "grad_norm": 0.6205734014511108, "learning_rate": 8.144796380090498e-06, "loss": 0.3769, "step": 420 }, { "epoch": 2.6100151745068283, "grad_norm": 0.6230446696281433, "learning_rate": 7.013574660633485e-06, "loss": 0.3802, "step": 430 }, { "epoch": 2.670713201820941, "grad_norm": 0.6412050127983093, "learning_rate": 5.882352941176471e-06, "loss": 0.3805, "step": 440 }, { "epoch": 2.731411229135053, "grad_norm": 0.6700997948646545, "learning_rate": 4.751131221719457e-06, "loss": 0.3725, "step": 450 }, { "epoch": 2.7921092564491654, "grad_norm": 0.6392331123352051, "learning_rate": 3.619909502262444e-06, "loss": 0.3729, "step": 460 }, { "epoch": 2.8528072837632776, "grad_norm": 0.6410810351371765, "learning_rate": 2.48868778280543e-06, "loss": 0.3582, "step": 470 }, { "epoch": 2.91350531107739, "grad_norm": 0.6067666411399841, "learning_rate": 1.3574660633484164e-06, "loss": 0.3758, "step": 480 }, { "epoch": 2.9742033383915025, "grad_norm": 0.6219173669815063, "learning_rate": 2.2624434389140275e-07, "loss": 0.373, "step": 490 }, { "epoch": 2.986342943854325, "eval_loss": 0.35858574509620667, "eval_recall": 0.9782797567332754, "eval_runtime": 61.7184, "eval_samples_per_second": 37.93, "eval_steps_per_second": 1.199, "step": 492 }, { "epoch": 2.986342943854325, "step": 492, "total_flos": 1.1154611028241981e+19, "train_loss": 0.4681245271510225, "train_runtime": 1816.2465, "train_samples_per_second": 34.801, "train_steps_per_second": 0.271 } ], "logging_steps": 10, "max_steps": 492, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1154611028241981e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }