|
{ |
|
"best_metric": 0.9782797567332754, |
|
"best_model_checkpoint": "convnext-large-224-finetuned-dog-vs-cat/checkpoint-492", |
|
"epoch": 2.986342943854325, |
|
"eval_steps": 500, |
|
"global_step": 492, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06069802731411229, |
|
"grad_norm": 1.2104277610778809, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6885, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12139605462822459, |
|
"grad_norm": 1.1519063711166382, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6854, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18209408194233687, |
|
"grad_norm": 1.203668475151062, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6788, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24279210925644917, |
|
"grad_norm": 1.3230795860290527, |
|
"learning_rate": 4e-05, |
|
"loss": 0.6664, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30349013657056145, |
|
"grad_norm": 1.1332818269729614, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6511, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36418816388467373, |
|
"grad_norm": 1.1424264907836914, |
|
"learning_rate": 4.8868778280542986e-05, |
|
"loss": 0.6321, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.424886191198786, |
|
"grad_norm": 1.1293511390686035, |
|
"learning_rate": 4.7737556561085976e-05, |
|
"loss": 0.6115, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48558421851289835, |
|
"grad_norm": 1.1212339401245117, |
|
"learning_rate": 4.660633484162896e-05, |
|
"loss": 0.5957, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5462822458270106, |
|
"grad_norm": 0.9936708807945251, |
|
"learning_rate": 4.547511312217195e-05, |
|
"loss": 0.5815, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6069802731411229, |
|
"grad_norm": 1.0561779737472534, |
|
"learning_rate": 4.434389140271493e-05, |
|
"loss": 0.5583, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6676783004552352, |
|
"grad_norm": 0.9779506921768188, |
|
"learning_rate": 4.321266968325792e-05, |
|
"loss": 0.5541, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7283763277693475, |
|
"grad_norm": 0.8941373825073242, |
|
"learning_rate": 4.2081447963800907e-05, |
|
"loss": 0.5408, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7890743550834598, |
|
"grad_norm": 0.9414535164833069, |
|
"learning_rate": 4.095022624434389e-05, |
|
"loss": 0.5288, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.849772382397572, |
|
"grad_norm": 0.8809621334075928, |
|
"learning_rate": 3.981900452488688e-05, |
|
"loss": 0.5194, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9104704097116844, |
|
"grad_norm": 0.8484501838684082, |
|
"learning_rate": 3.868778280542987e-05, |
|
"loss": 0.5038, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9711684370257967, |
|
"grad_norm": 0.8708747625350952, |
|
"learning_rate": 3.7556561085972854e-05, |
|
"loss": 0.496, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9954476479514416, |
|
"eval_loss": 0.4791725277900696, |
|
"eval_recall": 0.9200695047784535, |
|
"eval_runtime": 60.2024, |
|
"eval_samples_per_second": 38.885, |
|
"eval_steps_per_second": 1.229, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.031866464339909, |
|
"grad_norm": 0.8436847925186157, |
|
"learning_rate": 3.642533936651584e-05, |
|
"loss": 0.4861, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0925644916540211, |
|
"grad_norm": 0.7819939851760864, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 0.4844, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1532625189681336, |
|
"grad_norm": 0.847568929195404, |
|
"learning_rate": 3.416289592760181e-05, |
|
"loss": 0.4673, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2139605462822458, |
|
"grad_norm": 0.7669057846069336, |
|
"learning_rate": 3.3031674208144794e-05, |
|
"loss": 0.4583, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.274658573596358, |
|
"grad_norm": 0.766395092010498, |
|
"learning_rate": 3.1900452488687784e-05, |
|
"loss": 0.4473, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.3353566009104705, |
|
"grad_norm": 0.8044094443321228, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 0.4531, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.3960546282245827, |
|
"grad_norm": 0.7774869799613953, |
|
"learning_rate": 2.9638009049773758e-05, |
|
"loss": 0.439, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.456752655538695, |
|
"grad_norm": 0.7320767641067505, |
|
"learning_rate": 2.850678733031674e-05, |
|
"loss": 0.436, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.5174506828528074, |
|
"grad_norm": 0.7127722501754761, |
|
"learning_rate": 2.737556561085973e-05, |
|
"loss": 0.4314, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5781487101669196, |
|
"grad_norm": 0.7172250151634216, |
|
"learning_rate": 2.6244343891402718e-05, |
|
"loss": 0.4302, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.6388467374810318, |
|
"grad_norm": 0.7075967192649841, |
|
"learning_rate": 2.51131221719457e-05, |
|
"loss": 0.4174, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.699544764795144, |
|
"grad_norm": 0.7165055871009827, |
|
"learning_rate": 2.3981900452488688e-05, |
|
"loss": 0.4174, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.7602427921092565, |
|
"grad_norm": 0.6554096341133118, |
|
"learning_rate": 2.2850678733031675e-05, |
|
"loss": 0.4193, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.8209408194233687, |
|
"grad_norm": 0.7225818037986755, |
|
"learning_rate": 2.1719457013574662e-05, |
|
"loss": 0.3996, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8816388467374812, |
|
"grad_norm": 0.6873008012771606, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.4011, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.9423368740515934, |
|
"grad_norm": 0.6827677488327026, |
|
"learning_rate": 1.9457013574660635e-05, |
|
"loss": 0.3996, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9969650986342944, |
|
"eval_loss": 0.3836117684841156, |
|
"eval_recall": 0.9765421372719374, |
|
"eval_runtime": 60.251, |
|
"eval_samples_per_second": 38.854, |
|
"eval_steps_per_second": 1.228, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.0030349013657056, |
|
"grad_norm": 0.6939449906349182, |
|
"learning_rate": 1.832579185520362e-05, |
|
"loss": 0.4015, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.063732928679818, |
|
"grad_norm": 0.6604830026626587, |
|
"learning_rate": 1.7194570135746606e-05, |
|
"loss": 0.3934, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.12443095599393, |
|
"grad_norm": 0.6591904759407043, |
|
"learning_rate": 1.6063348416289596e-05, |
|
"loss": 0.3956, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.1851289833080423, |
|
"grad_norm": 0.6776930689811707, |
|
"learning_rate": 1.493212669683258e-05, |
|
"loss": 0.3904, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.245827010622155, |
|
"grad_norm": 0.6641884446144104, |
|
"learning_rate": 1.3800904977375568e-05, |
|
"loss": 0.3836, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.306525037936267, |
|
"grad_norm": 0.6640185117721558, |
|
"learning_rate": 1.2669683257918553e-05, |
|
"loss": 0.3849, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.3672230652503794, |
|
"grad_norm": 0.6439262628555298, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 0.3847, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.4279210925644916, |
|
"grad_norm": 0.6991675496101379, |
|
"learning_rate": 1.0407239819004526e-05, |
|
"loss": 0.3789, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.488619119878604, |
|
"grad_norm": 0.6298201680183411, |
|
"learning_rate": 9.276018099547511e-06, |
|
"loss": 0.3769, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.549317147192716, |
|
"grad_norm": 0.6205734014511108, |
|
"learning_rate": 8.144796380090498e-06, |
|
"loss": 0.3769, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.6100151745068283, |
|
"grad_norm": 0.6230446696281433, |
|
"learning_rate": 7.013574660633485e-06, |
|
"loss": 0.3802, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.670713201820941, |
|
"grad_norm": 0.6412050127983093, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.3805, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.731411229135053, |
|
"grad_norm": 0.6700997948646545, |
|
"learning_rate": 4.751131221719457e-06, |
|
"loss": 0.3725, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.7921092564491654, |
|
"grad_norm": 0.6392331123352051, |
|
"learning_rate": 3.619909502262444e-06, |
|
"loss": 0.3729, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.8528072837632776, |
|
"grad_norm": 0.6410810351371765, |
|
"learning_rate": 2.48868778280543e-06, |
|
"loss": 0.3582, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.91350531107739, |
|
"grad_norm": 0.6067666411399841, |
|
"learning_rate": 1.3574660633484164e-06, |
|
"loss": 0.3758, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.9742033383915025, |
|
"grad_norm": 0.6219173669815063, |
|
"learning_rate": 2.2624434389140275e-07, |
|
"loss": 0.373, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.986342943854325, |
|
"eval_loss": 0.35858574509620667, |
|
"eval_recall": 0.9782797567332754, |
|
"eval_runtime": 61.7184, |
|
"eval_samples_per_second": 37.93, |
|
"eval_steps_per_second": 1.199, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.986342943854325, |
|
"step": 492, |
|
"total_flos": 1.1154611028241981e+19, |
|
"train_loss": 0.4681245271510225, |
|
"train_runtime": 1816.2465, |
|
"train_samples_per_second": 34.801, |
|
"train_steps_per_second": 0.271 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 492, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1154611028241981e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|