diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1bd3e79c9932a132c284924b1ffd82e8e6005f6 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d215462475f1d6bb29bf799908c221488163d6e780abe79a796b6fe2a71204c1 +size 198025308 diff --git a/run-03pb2rjn/checkpoint-1232/model.safetensors b/run-03pb2rjn/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6390acd8f13cdbeee2b143e33f6ce38c1799df7c --- /dev/null +++ b/run-03pb2rjn/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f88c4f450ef9f901b2ae4e10c2ebc62aaf0dbb6185e442cae281cead39dd1d +size 198025308 diff --git a/run-03pb2rjn/checkpoint-1232/optimizer.pt b/run-03pb2rjn/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..859bf3d7301e163b1d5c0c36f406892f33ae4044 --- /dev/null +++ b/run-03pb2rjn/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a2e770e79b07ea6a2763c0e1cc59c9cbd10a9ef34a9fda0e81b4710cc11e3a9 +size 395900602 diff --git a/run-03pb2rjn/checkpoint-1232/rng_state.pth b/run-03pb2rjn/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-03pb2rjn/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-03pb2rjn/checkpoint-1232/scheduler.pt b/run-03pb2rjn/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bc316a55131d200bdaeb969f85d3b469fbd1903 --- /dev/null +++ b/run-03pb2rjn/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd700e1f1768b528d6a892cabb64f554f81bf377c5a1b46a76a44fcce36cbc1b +size 1064 diff --git a/run-03pb2rjn/checkpoint-1232/trainer_state.json b/run-03pb2rjn/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f79b74dceb0baedcf3d01b137449b6ca9c0e7a0d --- /dev/null +++ b/run-03pb2rjn/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.914079822616408, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-03pb2rjn/checkpoint-85", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.406622924584476e-05, + "loss": 1.2456, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8852549889135255, + "eval_loss": 0.908083438873291, + "eval_runtime": 7.1904, + "eval_samples_per_second": 501.78, + "eval_steps_per_second": 7.927, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00010813245849168952, + "loss": 0.8815, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016219868773753427, + "loss": 0.8142, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8091986775398254, + "eval_runtime": 6.629, + "eval_samples_per_second": 544.278, + "eval_steps_per_second": 8.599, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00021626491698337904, + "loss": 0.8038, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9079822616407982, + "eval_loss": 0.8263628482818604, + "eval_runtime": 7.0923, + "eval_samples_per_second": 508.721, + "eval_steps_per_second": 8.037, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0002703311462292238, + "loss": 0.7993, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00032439737547506855, + "loss": 0.7892, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7962860310421286, + "eval_loss": 1.0035172700881958, + "eval_runtime": 6.9076, + "eval_samples_per_second": 522.32, + "eval_steps_per_second": 8.252, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003784636047209133, + "loss": 0.7847, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004325298339667581, + "loss": 0.7907, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8406698107719421, + "eval_runtime": 6.9242, + "eval_samples_per_second": 521.074, + "eval_steps_per_second": 8.232, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00048659606321260285, + "loss": 0.788, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.8500009179115295, + "eval_runtime": 6.5523, + "eval_samples_per_second": 550.643, + "eval_steps_per_second": 8.699, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005406622924584476, + "loss": 0.7905, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005947285217042924, + "loss": 0.7903, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8722283813747228, + "eval_loss": 0.8836174607276917, + "eval_runtime": 6.8478, + "eval_samples_per_second": 526.885, + "eval_steps_per_second": 8.324, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0006487947509501371, + "loss": 0.8044, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007028609801959819, + "loss": 0.8005, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8386917960088692, + "eval_loss": 0.9253634214401245, + "eval_runtime": 6.6711, + "eval_samples_per_second": 540.836, + "eval_steps_per_second": 8.544, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007569272094418266, + "loss": 0.8074, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8539356984478935, + "eval_loss": 0.9217946529388428, + "eval_runtime": 6.8305, + "eval_samples_per_second": 528.221, + "eval_steps_per_second": 8.345, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0008109934386876715, + "loss": 0.8214, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008650596679335162, + "loss": 0.8211, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8373059866962306, + "eval_loss": 1.0037873983383179, + "eval_runtime": 6.9272, + "eval_samples_per_second": 520.846, + "eval_steps_per_second": 8.228, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.000871900198866313, + "loss": 0.8096, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.873059866962306, + "eval_loss": 0.8749555945396423, + "eval_runtime": 6.9276, + "eval_samples_per_second": 520.816, + "eval_steps_per_second": 8.228, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008663597810925184, + "loss": 0.8142, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008567597779112751, + "loss": 0.8129, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8395232815964523, + "eval_loss": 0.9741567373275757, + "eval_runtime": 6.6401, + "eval_samples_per_second": 543.364, + "eval_steps_per_second": 8.584, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0008431908913533095, + "loss": 0.8176, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0008257813219558784, + "loss": 0.812, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.871119733924612, + "eval_loss": 0.8878970742225647, + "eval_runtime": 6.8805, + "eval_samples_per_second": 524.377, + "eval_steps_per_second": 8.284, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0008046955575080593, + "loss": 0.7965, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8503325942350333, + "eval_loss": 0.9190165400505066, + "eval_runtime": 7.1454, + "eval_samples_per_second": 504.939, + "eval_steps_per_second": 7.977, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0007801328189495462, + "loss": 0.8128, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0007523251781062956, + "loss": 0.7996, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8924611973392461, + "eval_loss": 0.8424035906791687, + "eval_runtime": 6.9445, + "eval_samples_per_second": 519.551, + "eval_steps_per_second": 8.208, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0007215353650468892, + "loss": 0.7975, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0006880542857759862, + "loss": 0.7944, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8744456762749445, + "eval_loss": 0.8793225288391113, + "eval_runtime": 6.6692, + "eval_samples_per_second": 540.99, + "eval_steps_per_second": 8.547, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0006521982737180083, + "loss": 0.787, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8824833702882483, + "eval_loss": 0.8486706614494324, + "eval_runtime": 6.908, + "eval_samples_per_second": 522.295, + "eval_steps_per_second": 8.251, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0006143061009593818, + "loss": 0.7792, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0005747357774874944, + "loss": 0.7753, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.8869179600886918, + "eval_loss": 0.8535000085830688, + "eval_runtime": 6.9297, + "eval_samples_per_second": 520.654, + "eval_steps_per_second": 8.225, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0005338611686675588, + "loss": 0.7639, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0004920684629158832, + "loss": 0.7647, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9018847006651884, + "eval_loss": 0.8282992839813232, + "eval_runtime": 6.784, + "eval_samples_per_second": 531.841, + "eval_steps_per_second": 8.402, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00044975252294341203, + "loss": 0.7658, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8891352549889135, + "eval_loss": 0.8540334105491638, + "eval_runtime": 6.87, + "eval_samples_per_second": 525.183, + "eval_steps_per_second": 8.297, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00040731315504344935, + "loss": 0.7538, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00036515133167179504, + "loss": 0.7442, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.8362773060798645, + "eval_runtime": 7.0082, + "eval_samples_per_second": 514.823, + "eval_steps_per_second": 8.133, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.000323665403008833, + "loss": 0.7353, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8352402448654175, + "eval_runtime": 6.9197, + "eval_samples_per_second": 521.408, + "eval_steps_per_second": 8.237, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002832473332972056, + "loss": 0.7395, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.000244278997514626, + "loss": 0.7326, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8239282965660095, + "eval_runtime": 6.561, + "eval_samples_per_second": 549.915, + "eval_steps_per_second": 8.688, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.0002071285733713279, + "loss": 0.7338, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.0001721470627210114, + "loss": 0.7224, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8245892524719238, + "eval_runtime": 6.5794, + "eval_samples_per_second": 548.381, + "eval_steps_per_second": 8.663, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00013966497525142913, + "loss": 0.7165, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8174624443054199, + "eval_runtime": 6.9114, + "eval_samples_per_second": 522.036, + "eval_steps_per_second": 8.247, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 0.00010998920578752054, + "loss": 0.717, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 8.340013471072141e-05, + "loss": 0.711, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8170289397239685, + "eval_runtime": 6.8654, + "eval_samples_per_second": 525.535, + "eval_steps_per_second": 8.303, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.014897889005363e-05, + "loss": 0.7115, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.0455418153727286e-05, + "loss": 0.7106, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8164230585098267, + "eval_runtime": 6.9054, + "eval_samples_per_second": 522.493, + "eval_steps_per_second": 8.254, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.450551972664773e-05, + "loss": 0.707, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8162174820899963, + "eval_runtime": 7.0016, + "eval_samples_per_second": 515.309, + "eval_steps_per_second": 8.141, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.244998024400373e-05, + "loss": 0.7017, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.402701950610671e-06, + "loss": 0.702, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8174305558204651, + "eval_runtime": 6.8715, + "eval_samples_per_second": 525.068, + "eval_steps_per_second": 8.295, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0008733775493559538, + "metric": "eval/loss", + "warmup_ratio": 0.3328703828976002 + } +} diff --git a/run-03pb2rjn/checkpoint-1232/training_args.bin b/run-03pb2rjn/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d41af12aa149d39dfe79db1c6fde3c6cbd1ba75 --- /dev/null +++ b/run-03pb2rjn/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f790c3845918a315719ceecd0fdbf0a1a8fa12de0e64e0248d8e4265fdc5885b +size 4792 diff --git a/run-03pb2rjn/checkpoint-1260/model.safetensors b/run-03pb2rjn/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6817d57543bbe5a4c069ab8f7b805d82801aa0e --- /dev/null +++ b/run-03pb2rjn/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63841cb5aa23b01cf9dcd0d15d516e306e5f6febf74149cbf42f645b0c1cea2f +size 198025308 diff --git a/run-03pb2rjn/checkpoint-1260/optimizer.pt b/run-03pb2rjn/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd9ffe20306e921c17f968dc2508131caa97deca --- /dev/null +++ b/run-03pb2rjn/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf803b29ab9913e181120e8ca5362cc4dbdd991984a7c240b70775709d6c3caf +size 395900602 diff --git a/run-03pb2rjn/checkpoint-1260/rng_state.pth b/run-03pb2rjn/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-03pb2rjn/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-03pb2rjn/checkpoint-1260/scheduler.pt b/run-03pb2rjn/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae802a5a5b9d4f4ebf44d4a1497d76c0c8eff214 --- /dev/null +++ b/run-03pb2rjn/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c257389676b3d8a7311d027a866546762fd2586818c6cf9d6c0acb1686182b64 +size 1064 diff --git a/run-03pb2rjn/checkpoint-1260/trainer_state.json b/run-03pb2rjn/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..312015e14cc34db6b42f0f10aa3f958670a74af1 --- /dev/null +++ b/run-03pb2rjn/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9154656319290465, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-03pb2rjn/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.406622924584476e-05, + "loss": 1.2456, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8852549889135255, + "eval_loss": 0.908083438873291, + "eval_runtime": 7.1904, + "eval_samples_per_second": 501.78, + "eval_steps_per_second": 7.927, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00010813245849168952, + "loss": 0.8815, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016219868773753427, + "loss": 0.8142, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8091986775398254, + "eval_runtime": 6.629, + "eval_samples_per_second": 544.278, + "eval_steps_per_second": 8.599, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00021626491698337904, + "loss": 0.8038, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9079822616407982, + "eval_loss": 0.8263628482818604, + "eval_runtime": 7.0923, + "eval_samples_per_second": 508.721, + "eval_steps_per_second": 8.037, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0002703311462292238, + "loss": 0.7993, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00032439737547506855, + "loss": 0.7892, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7962860310421286, + "eval_loss": 1.0035172700881958, + "eval_runtime": 6.9076, + "eval_samples_per_second": 522.32, + "eval_steps_per_second": 8.252, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003784636047209133, + "loss": 0.7847, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004325298339667581, + "loss": 0.7907, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8406698107719421, + "eval_runtime": 6.9242, + "eval_samples_per_second": 521.074, + "eval_steps_per_second": 8.232, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00048659606321260285, + "loss": 0.788, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.8500009179115295, + "eval_runtime": 6.5523, + "eval_samples_per_second": 550.643, + "eval_steps_per_second": 8.699, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005406622924584476, + "loss": 0.7905, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005947285217042924, + "loss": 0.7903, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8722283813747228, + "eval_loss": 0.8836174607276917, + "eval_runtime": 6.8478, + "eval_samples_per_second": 526.885, + "eval_steps_per_second": 8.324, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0006487947509501371, + "loss": 0.8044, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007028609801959819, + "loss": 0.8005, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8386917960088692, + "eval_loss": 0.9253634214401245, + "eval_runtime": 6.6711, + "eval_samples_per_second": 540.836, + "eval_steps_per_second": 8.544, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007569272094418266, + "loss": 0.8074, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8539356984478935, + "eval_loss": 0.9217946529388428, + "eval_runtime": 6.8305, + "eval_samples_per_second": 528.221, + "eval_steps_per_second": 8.345, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0008109934386876715, + "loss": 0.8214, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008650596679335162, + "loss": 0.8211, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8373059866962306, + "eval_loss": 1.0037873983383179, + "eval_runtime": 6.9272, + "eval_samples_per_second": 520.846, + "eval_steps_per_second": 8.228, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.000871900198866313, + "loss": 0.8096, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.873059866962306, + "eval_loss": 0.8749555945396423, + "eval_runtime": 6.9276, + "eval_samples_per_second": 520.816, + "eval_steps_per_second": 8.228, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008663597810925184, + "loss": 0.8142, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008567597779112751, + "loss": 0.8129, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8395232815964523, + "eval_loss": 0.9741567373275757, + "eval_runtime": 6.6401, + "eval_samples_per_second": 543.364, + "eval_steps_per_second": 8.584, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0008431908913533095, + "loss": 0.8176, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0008257813219558784, + "loss": 0.812, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.871119733924612, + "eval_loss": 0.8878970742225647, + "eval_runtime": 6.8805, + "eval_samples_per_second": 524.377, + "eval_steps_per_second": 8.284, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0008046955575080593, + "loss": 0.7965, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8503325942350333, + "eval_loss": 0.9190165400505066, + "eval_runtime": 7.1454, + "eval_samples_per_second": 504.939, + "eval_steps_per_second": 7.977, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0007801328189495462, + "loss": 0.8128, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0007523251781062956, + "loss": 0.7996, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8924611973392461, + "eval_loss": 0.8424035906791687, + "eval_runtime": 6.9445, + "eval_samples_per_second": 519.551, + "eval_steps_per_second": 8.208, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0007215353650468892, + "loss": 0.7975, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0006880542857759862, + "loss": 0.7944, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8744456762749445, + "eval_loss": 0.8793225288391113, + "eval_runtime": 6.6692, + "eval_samples_per_second": 540.99, + "eval_steps_per_second": 8.547, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0006521982737180083, + "loss": 0.787, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8824833702882483, + "eval_loss": 0.8486706614494324, + "eval_runtime": 6.908, + "eval_samples_per_second": 522.295, + "eval_steps_per_second": 8.251, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0006143061009593818, + "loss": 0.7792, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0005747357774874944, + "loss": 0.7753, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.8869179600886918, + "eval_loss": 0.8535000085830688, + "eval_runtime": 6.9297, + "eval_samples_per_second": 520.654, + "eval_steps_per_second": 8.225, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0005338611686675588, + "loss": 0.7639, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0004920684629158832, + "loss": 0.7647, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9018847006651884, + "eval_loss": 0.8282992839813232, + "eval_runtime": 6.784, + "eval_samples_per_second": 531.841, + "eval_steps_per_second": 8.402, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00044975252294341203, + "loss": 0.7658, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8891352549889135, + "eval_loss": 0.8540334105491638, + "eval_runtime": 6.87, + "eval_samples_per_second": 525.183, + "eval_steps_per_second": 8.297, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00040731315504344935, + "loss": 0.7538, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00036515133167179504, + "loss": 0.7442, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.8362773060798645, + "eval_runtime": 7.0082, + "eval_samples_per_second": 514.823, + "eval_steps_per_second": 8.133, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.000323665403008833, + "loss": 0.7353, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8352402448654175, + "eval_runtime": 6.9197, + "eval_samples_per_second": 521.408, + "eval_steps_per_second": 8.237, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002832473332972056, + "loss": 0.7395, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.000244278997514626, + "loss": 0.7326, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8239282965660095, + "eval_runtime": 6.561, + "eval_samples_per_second": 549.915, + "eval_steps_per_second": 8.688, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.0002071285733713279, + "loss": 0.7338, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.0001721470627210114, + "loss": 0.7224, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8245892524719238, + "eval_runtime": 6.5794, + "eval_samples_per_second": 548.381, + "eval_steps_per_second": 8.663, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00013966497525142913, + "loss": 0.7165, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8174624443054199, + "eval_runtime": 6.9114, + "eval_samples_per_second": 522.036, + "eval_steps_per_second": 8.247, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 0.00010998920578752054, + "loss": 0.717, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 8.340013471072141e-05, + "loss": 0.711, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8170289397239685, + "eval_runtime": 6.8654, + "eval_samples_per_second": 525.535, + "eval_steps_per_second": 8.303, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.014897889005363e-05, + "loss": 0.7115, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.0455418153727286e-05, + "loss": 0.7106, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8164230585098267, + "eval_runtime": 6.9054, + "eval_samples_per_second": 522.493, + "eval_steps_per_second": 8.254, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.450551972664773e-05, + "loss": 0.707, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8162174820899963, + "eval_runtime": 7.0016, + "eval_samples_per_second": 515.309, + "eval_steps_per_second": 8.141, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.244998024400373e-05, + "loss": 0.7017, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.402701950610671e-06, + "loss": 0.702, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8174305558204651, + "eval_runtime": 6.8715, + "eval_samples_per_second": 525.068, + "eval_steps_per_second": 8.295, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 4.3971653825558924e-07, + "loss": 0.7063, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.812396764755249, + "eval_runtime": 6.8762, + "eval_samples_per_second": 524.708, + "eval_steps_per_second": 8.289, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0008733775493559538, + "metric": "eval/loss", + "warmup_ratio": 0.3328703828976002 + } +} diff --git a/run-03pb2rjn/checkpoint-1260/training_args.bin b/run-03pb2rjn/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d41af12aa149d39dfe79db1c6fde3c6cbd1ba75 --- /dev/null +++ b/run-03pb2rjn/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f790c3845918a315719ceecd0fdbf0a1a8fa12de0e64e0248d8e4265fdc5885b +size 4792 diff --git a/run-0t23cnxu/checkpoint-1232/model.safetensors b/run-0t23cnxu/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..370d89d00647cb95b85d20e99992df2a4066626f --- /dev/null +++ b/run-0t23cnxu/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cc2e91bb0a3075ace3f8c0aa7bd38cdf99697cdf1306b87b57d4d565906e666 +size 198025308 diff --git a/run-0t23cnxu/checkpoint-1232/optimizer.pt b/run-0t23cnxu/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d9e05337d76c34fd01b5701d2bd330592deeb31 --- /dev/null +++ b/run-0t23cnxu/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:554e763483561667636a2ac9be6574b6f7dd46f43bb074ac465b3434553487a3 +size 395900602 diff --git a/run-0t23cnxu/checkpoint-1232/rng_state.pth b/run-0t23cnxu/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-0t23cnxu/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-0t23cnxu/checkpoint-1232/scheduler.pt b/run-0t23cnxu/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..19782463c60202cad01559b1fee2525eb46dd6a2 --- /dev/null +++ b/run-0t23cnxu/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:417adabfc2f3fb03fb51355107a98ed1a08058242dea66c353b5cc45ab70ace4 +size 1064 diff --git a/run-0t23cnxu/checkpoint-1232/trainer_state.json b/run-0t23cnxu/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b7cfb2b0b6401f9dca37f92ae64282813695221a --- /dev/null +++ b/run-0t23cnxu/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.916019955654102, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-0t23cnxu/checkpoint-1190", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.592141778986334e-05, + "loss": 1.2233, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8896895787139689, + "eval_loss": 0.89235520362854, + "eval_runtime": 6.9506, + "eval_samples_per_second": 519.09, + "eval_steps_per_second": 8.201, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00013184283557972668, + "loss": 0.8733, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00019776425336959002, + "loss": 0.8108, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8281971216201782, + "eval_runtime": 7.0446, + "eval_samples_per_second": 512.163, + "eval_steps_per_second": 8.091, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00026368567115945337, + "loss": 0.8033, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.835341215133667, + "eval_runtime": 7.2333, + "eval_samples_per_second": 498.805, + "eval_steps_per_second": 7.88, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003296070889493167, + "loss": 0.7991, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00039552850673918005, + "loss": 0.7927, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.3062638580931264, + "eval_loss": 1.9181619882583618, + "eval_runtime": 6.8913, + "eval_samples_per_second": 523.562, + "eval_steps_per_second": 8.271, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00046144992452904344, + "loss": 0.7921, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005273713423189067, + "loss": 0.7957, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8708425720620843, + "eval_loss": 0.8894914388656616, + "eval_runtime": 6.783, + "eval_samples_per_second": 531.919, + "eval_steps_per_second": 8.403, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005932927601087701, + "loss": 0.808, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8689024390243902, + "eval_loss": 0.8922765851020813, + "eval_runtime": 6.8307, + "eval_samples_per_second": 528.203, + "eval_steps_per_second": 8.345, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0006592141778986334, + "loss": 0.8061, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007251355956884968, + "loss": 0.8088, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8658536585365854, + "eval_loss": 0.8844387531280518, + "eval_runtime": 6.8269, + "eval_samples_per_second": 528.496, + "eval_steps_per_second": 8.349, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007910570134783601, + "loss": 0.8271, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0008442406306968075, + "loss": 0.8215, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.49057649667405767, + "eval_loss": 1.415905475616455, + "eval_runtime": 6.8082, + "eval_samples_per_second": 529.95, + "eval_steps_per_second": 8.372, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0008419736716673505, + "loss": 0.8314, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8508869179600886, + "eval_loss": 0.925150454044342, + "eval_runtime": 6.7965, + "eval_samples_per_second": 530.858, + "eval_steps_per_second": 8.387, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0008364493039611666, + "loss": 0.8414, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008277103911683624, + "loss": 0.8306, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.844789356984479, + "eval_loss": 0.9613429307937622, + "eval_runtime": 6.7109, + "eval_samples_per_second": 537.632, + "eval_steps_per_second": 8.494, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0008158247385466344, + "loss": 0.8208, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8561529933481153, + "eval_loss": 0.9101646542549133, + "eval_runtime": 6.8804, + "eval_samples_per_second": 524.39, + "eval_steps_per_second": 8.284, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008008845669200157, + "loss": 0.8112, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0007830057971371508, + "loss": 0.8103, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.866130820399113, + "eval_loss": 0.8902859687805176, + "eval_runtime": 6.5342, + "eval_samples_per_second": 552.175, + "eval_steps_per_second": 8.723, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0007623271506409885, + "loss": 0.8114, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0007390090731285776, + "loss": 0.8066, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.883869179600887, + "eval_loss": 0.8509737849235535, + "eval_runtime": 6.859, + "eval_samples_per_second": 526.023, + "eval_steps_per_second": 8.31, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0007132324896522899, + "loss": 0.789, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8181818181818182, + "eval_loss": 0.9610012173652649, + "eval_runtime": 6.7942, + "eval_samples_per_second": 531.04, + "eval_steps_per_second": 8.389, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0006851974008216503, + "loss": 0.8046, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0006551213309978434, + "loss": 0.7871, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8794345898004434, + "eval_loss": 0.8511735796928406, + "eval_runtime": 7.0266, + "eval_samples_per_second": 513.474, + "eval_steps_per_second": 8.112, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.000623237640521366, + "loss": 0.7895, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0005897937150682545, + "loss": 0.7796, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8941241685144125, + "eval_loss": 0.8450374007225037, + "eval_runtime": 6.4133, + "eval_samples_per_second": 562.585, + "eval_steps_per_second": 8.888, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005550490461836812, + "loss": 0.7766, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8919068736141907, + "eval_loss": 0.84095698595047, + "eval_runtime": 6.6733, + "eval_samples_per_second": 540.666, + "eval_steps_per_second": 8.542, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0005192732178860646, + "loss": 0.7699, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00048274381496364333, + "loss": 0.7682, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8209763169288635, + "eval_runtime": 6.9292, + "eval_samples_per_second": 520.693, + "eval_steps_per_second": 8.226, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00044574426919304425, + "loss": 0.7545, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00040856166019104657, + "loss": 0.7493, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8305134773254395, + "eval_runtime": 6.4165, + "eval_samples_per_second": 562.302, + "eval_steps_per_second": 8.883, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00037148448796274085, + "loss": 0.7647, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8957871396895787, + "eval_loss": 0.832939863204956, + "eval_runtime": 7.1704, + "eval_samples_per_second": 503.181, + "eval_steps_per_second": 7.949, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003348004344288875, + "loss": 0.7469, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00029879413130079794, + "loss": 0.7394, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8302128911018372, + "eval_runtime": 6.5838, + "eval_samples_per_second": 548.015, + "eval_steps_per_second": 8.658, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0002637449516217992, + "loss": 0.7289, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.8258054852485657, + "eval_runtime": 7.2067, + "eval_samples_per_second": 500.644, + "eval_steps_per_second": 7.909, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002299248421107255, + "loss": 0.7366, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00019759621312629024, + "loss": 0.7284, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8189506530761719, + "eval_runtime": 6.778, + "eval_samples_per_second": 532.311, + "eval_steps_per_second": 8.41, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.0001670099026241101, + "loss": 0.7256, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00013840322990405166, + "loss": 0.7182, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8193372488021851, + "eval_runtime": 6.6489, + "eval_samples_per_second": 542.643, + "eval_steps_per_second": 8.573, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00011199815424887838, + "loss": 0.7122, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8143371343612671, + "eval_runtime": 6.823, + "eval_samples_per_second": 528.798, + "eval_steps_per_second": 8.354, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 8.799955274132829e-05, + "loss": 0.7151, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.659363062204812e-05, + "loss": 0.7075, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8188567757606506, + "eval_runtime": 6.7351, + "eval_samples_per_second": 535.703, + "eval_steps_per_second": 8.463, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.794647652242038e-05, + "loss": 0.7083, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.220277378223892e-05, + "loss": 0.7105, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.81794673204422, + "eval_runtime": 7.0537, + "eval_samples_per_second": 511.506, + "eval_steps_per_second": 8.081, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.9484677851123602e-05, + "loss": 0.7061, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.809096097946167, + "eval_runtime": 6.7896, + "eval_samples_per_second": 531.403, + "eval_steps_per_second": 8.395, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 9.890868483920935e-06, + "loss": 0.7012, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.4957840841091803e-06, + "loss": 0.7019, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8142038583755493, + "eval_runtime": 6.8663, + "eval_samples_per_second": 525.467, + "eval_steps_per_second": 8.301, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0008443012355394036, + "metric": "eval/loss", + "warmup_ratio": 0.26422569175932836 + } +} diff --git a/run-0t23cnxu/checkpoint-1232/training_args.bin b/run-0t23cnxu/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f87ebd80099253396cad2a4da9937461a1d3c52 --- /dev/null +++ b/run-0t23cnxu/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab04088d6a5b249c07af2275e9e3ba21c8e3dcf2db2dbfdad8f76bcc46b0372a +size 4792 diff --git a/run-0t23cnxu/checkpoint-1260/model.safetensors b/run-0t23cnxu/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76b20f1504b6800637d0a627b76e293bc920f504 --- /dev/null +++ b/run-0t23cnxu/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb30864fcb2c1e8ee3a05ebc7b87b4da398661551363e04fcdacff154878c5b +size 198025308 diff --git a/run-0t23cnxu/checkpoint-1260/optimizer.pt b/run-0t23cnxu/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d499424f3c491c744a2032efc470deb15ea856c0 --- /dev/null +++ b/run-0t23cnxu/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102eeec9c08aa585a37266e4dd19307a29a9a19815ce690843f938361252930c +size 395900602 diff --git a/run-0t23cnxu/checkpoint-1260/rng_state.pth b/run-0t23cnxu/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-0t23cnxu/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-0t23cnxu/checkpoint-1260/scheduler.pt b/run-0t23cnxu/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7104479b44c949642a1fdade177f1731ba8814f --- /dev/null +++ b/run-0t23cnxu/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e04296fca60c05e6440e7b67de679bf2c4bee6f76163acffa199c3cb36d8a67 +size 1064 diff --git a/run-0t23cnxu/checkpoint-1260/trainer_state.json b/run-0t23cnxu/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8e9e9c3b3c47eb68f3da8610fc278cd0f5297b87 --- /dev/null +++ b/run-0t23cnxu/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9171286031042128, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-0t23cnxu/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.592141778986334e-05, + "loss": 1.2233, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8896895787139689, + "eval_loss": 0.89235520362854, + "eval_runtime": 6.9506, + "eval_samples_per_second": 519.09, + "eval_steps_per_second": 8.201, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00013184283557972668, + "loss": 0.8733, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00019776425336959002, + "loss": 0.8108, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8281971216201782, + "eval_runtime": 7.0446, + "eval_samples_per_second": 512.163, + "eval_steps_per_second": 8.091, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00026368567115945337, + "loss": 0.8033, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.835341215133667, + "eval_runtime": 7.2333, + "eval_samples_per_second": 498.805, + "eval_steps_per_second": 7.88, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003296070889493167, + "loss": 0.7991, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00039552850673918005, + "loss": 0.7927, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.3062638580931264, + "eval_loss": 1.9181619882583618, + "eval_runtime": 6.8913, + "eval_samples_per_second": 523.562, + "eval_steps_per_second": 8.271, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00046144992452904344, + "loss": 0.7921, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005273713423189067, + "loss": 0.7957, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8708425720620843, + "eval_loss": 0.8894914388656616, + "eval_runtime": 6.783, + "eval_samples_per_second": 531.919, + "eval_steps_per_second": 8.403, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005932927601087701, + "loss": 0.808, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8689024390243902, + "eval_loss": 0.8922765851020813, + "eval_runtime": 6.8307, + "eval_samples_per_second": 528.203, + "eval_steps_per_second": 8.345, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0006592141778986334, + "loss": 0.8061, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007251355956884968, + "loss": 0.8088, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8658536585365854, + "eval_loss": 0.8844387531280518, + "eval_runtime": 6.8269, + "eval_samples_per_second": 528.496, + "eval_steps_per_second": 8.349, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007910570134783601, + "loss": 0.8271, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0008442406306968075, + "loss": 0.8215, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.49057649667405767, + "eval_loss": 1.415905475616455, + "eval_runtime": 6.8082, + "eval_samples_per_second": 529.95, + "eval_steps_per_second": 8.372, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0008419736716673505, + "loss": 0.8314, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8508869179600886, + "eval_loss": 0.925150454044342, + "eval_runtime": 6.7965, + "eval_samples_per_second": 530.858, + "eval_steps_per_second": 8.387, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0008364493039611666, + "loss": 0.8414, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008277103911683624, + "loss": 0.8306, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.844789356984479, + "eval_loss": 0.9613429307937622, + "eval_runtime": 6.7109, + "eval_samples_per_second": 537.632, + "eval_steps_per_second": 8.494, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0008158247385466344, + "loss": 0.8208, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8561529933481153, + "eval_loss": 0.9101646542549133, + "eval_runtime": 6.8804, + "eval_samples_per_second": 524.39, + "eval_steps_per_second": 8.284, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008008845669200157, + "loss": 0.8112, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0007830057971371508, + "loss": 0.8103, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.866130820399113, + "eval_loss": 0.8902859687805176, + "eval_runtime": 6.5342, + "eval_samples_per_second": 552.175, + "eval_steps_per_second": 8.723, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0007623271506409885, + "loss": 0.8114, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0007390090731285776, + "loss": 0.8066, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.883869179600887, + "eval_loss": 0.8509737849235535, + "eval_runtime": 6.859, + "eval_samples_per_second": 526.023, + "eval_steps_per_second": 8.31, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0007132324896522899, + "loss": 0.789, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8181818181818182, + "eval_loss": 0.9610012173652649, + "eval_runtime": 6.7942, + "eval_samples_per_second": 531.04, + "eval_steps_per_second": 8.389, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0006851974008216503, + "loss": 0.8046, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0006551213309978434, + "loss": 0.7871, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8794345898004434, + "eval_loss": 0.8511735796928406, + "eval_runtime": 7.0266, + "eval_samples_per_second": 513.474, + "eval_steps_per_second": 8.112, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.000623237640521366, + "loss": 0.7895, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0005897937150682545, + "loss": 0.7796, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8941241685144125, + "eval_loss": 0.8450374007225037, + "eval_runtime": 6.4133, + "eval_samples_per_second": 562.585, + "eval_steps_per_second": 8.888, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005550490461836812, + "loss": 0.7766, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8919068736141907, + "eval_loss": 0.84095698595047, + "eval_runtime": 6.6733, + "eval_samples_per_second": 540.666, + "eval_steps_per_second": 8.542, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0005192732178860646, + "loss": 0.7699, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00048274381496364333, + "loss": 0.7682, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8209763169288635, + "eval_runtime": 6.9292, + "eval_samples_per_second": 520.693, + "eval_steps_per_second": 8.226, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00044574426919304425, + "loss": 0.7545, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00040856166019104657, + "loss": 0.7493, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8305134773254395, + "eval_runtime": 6.4165, + "eval_samples_per_second": 562.302, + "eval_steps_per_second": 8.883, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00037148448796274085, + "loss": 0.7647, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8957871396895787, + "eval_loss": 0.832939863204956, + "eval_runtime": 7.1704, + "eval_samples_per_second": 503.181, + "eval_steps_per_second": 7.949, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003348004344288875, + "loss": 0.7469, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00029879413130079794, + "loss": 0.7394, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8302128911018372, + "eval_runtime": 6.5838, + "eval_samples_per_second": 548.015, + "eval_steps_per_second": 8.658, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0002637449516217992, + "loss": 0.7289, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.8258054852485657, + "eval_runtime": 7.2067, + "eval_samples_per_second": 500.644, + "eval_steps_per_second": 7.909, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002299248421107255, + "loss": 0.7366, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00019759621312629024, + "loss": 0.7284, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8189506530761719, + "eval_runtime": 6.778, + "eval_samples_per_second": 532.311, + "eval_steps_per_second": 8.41, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.0001670099026241101, + "loss": 0.7256, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00013840322990405166, + "loss": 0.7182, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8193372488021851, + "eval_runtime": 6.6489, + "eval_samples_per_second": 542.643, + "eval_steps_per_second": 8.573, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00011199815424887838, + "loss": 0.7122, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8143371343612671, + "eval_runtime": 6.823, + "eval_samples_per_second": 528.798, + "eval_steps_per_second": 8.354, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 8.799955274132829e-05, + "loss": 0.7151, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.659363062204812e-05, + "loss": 0.7075, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8188567757606506, + "eval_runtime": 6.7351, + "eval_samples_per_second": 535.703, + "eval_steps_per_second": 8.463, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.794647652242038e-05, + "loss": 0.7083, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.220277378223892e-05, + "loss": 0.7105, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.81794673204422, + "eval_runtime": 7.0537, + "eval_samples_per_second": 511.506, + "eval_steps_per_second": 8.081, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.9484677851123602e-05, + "loss": 0.7061, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.809096097946167, + "eval_runtime": 6.7896, + "eval_samples_per_second": 531.403, + "eval_steps_per_second": 8.395, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 9.890868483920935e-06, + "loss": 0.7012, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.4957840841091803e-06, + "loss": 0.7019, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8142038583755493, + "eval_runtime": 6.8663, + "eval_samples_per_second": 525.467, + "eval_steps_per_second": 8.301, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 3.4904413594245166e-07, + "loss": 0.7049, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8099156022071838, + "eval_runtime": 6.6948, + "eval_samples_per_second": 538.93, + "eval_steps_per_second": 8.514, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0008443012355394036, + "metric": "eval/loss", + "warmup_ratio": 0.26422569175932836 + } +} diff --git a/run-0t23cnxu/checkpoint-1260/training_args.bin b/run-0t23cnxu/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f87ebd80099253396cad2a4da9937461a1d3c52 --- /dev/null +++ b/run-0t23cnxu/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab04088d6a5b249c07af2275e9e3ba21c8e3dcf2db2dbfdad8f76bcc46b0372a +size 4792 diff --git a/run-22pm9fz7/checkpoint-573/model.safetensors b/run-22pm9fz7/checkpoint-573/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..377eb65db883dd2dda908a0d99ad19aa8db45d94 --- /dev/null +++ b/run-22pm9fz7/checkpoint-573/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:218a8aeed10478fa490b97ea30aebda7ac73a34263790c45e83a5d756b31772d +size 198025308 diff --git a/run-22pm9fz7/checkpoint-573/optimizer.pt b/run-22pm9fz7/checkpoint-573/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb8746435eca84eda88b2382937cbaf68760d383 --- /dev/null +++ b/run-22pm9fz7/checkpoint-573/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7775b298ddb9c3cb25f0605b6806a8ec3029e0c394c7eeb9e2e8f402b6abc4c +size 395900602 diff --git a/run-22pm9fz7/checkpoint-573/rng_state.pth b/run-22pm9fz7/checkpoint-573/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e8713f9caaf617efce0d3935618a93ad2f5f391 --- /dev/null +++ b/run-22pm9fz7/checkpoint-573/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9102bb312b12c2313ea7585eb813beef5c548592778aaea4ab0516e14ecd38e5 +size 14244 diff --git a/run-22pm9fz7/checkpoint-573/scheduler.pt b/run-22pm9fz7/checkpoint-573/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dba347168d04e47275256b1e21812aafeec387f7 --- /dev/null +++ b/run-22pm9fz7/checkpoint-573/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d190657aad44956070319e4cbdc740e546c0b283321e639ed75a4df57212c478 +size 1064 diff --git a/run-22pm9fz7/checkpoint-573/trainer_state.json b/run-22pm9fz7/checkpoint-573/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..46d8c59a57018575a386f08ea613dd96c280f644 --- /dev/null +++ b/run-22pm9fz7/checkpoint-573/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": 0.9190093173009881, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-22pm9fz7/checkpoint-573", + "epoch": 26.96470588235294, + "eval_steps": 500, + "global_step": 573, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.2219843366163651e-05, + "loss": 1.486, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8276053215077606, + "eval_f1": 0.7506333806665745, + "eval_loss": 1.1113407611846924, + "eval_precision": 0.6867607734554959, + "eval_recall": 0.8276053215077606, + "eval_runtime": 8.3733, + "eval_samples_per_second": 430.891, + "eval_steps_per_second": 3.463, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.4439686732327303e-05, + "loss": 1.1919, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 3.665953009849096e-05, + "loss": 0.9574, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8592017738359202, + "eval_f1": 0.8158688229267518, + "eval_loss": 0.9284856915473938, + "eval_precision": 0.8413284596404325, + "eval_recall": 0.8592017738359202, + "eval_runtime": 8.4795, + "eval_samples_per_second": 425.497, + "eval_steps_per_second": 3.42, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 4.8879373464654605e-05, + "loss": 0.8789, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.887994515834578, + "eval_loss": 0.8563220500946045, + "eval_precision": 0.888627325917696, + "eval_recall": 0.8968957871396895, + "eval_runtime": 8.648, + "eval_samples_per_second": 417.204, + "eval_steps_per_second": 3.353, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 6.109921683081827e-05, + "loss": 0.8471, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 7.331906019698191e-05, + "loss": 0.8107, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9058964979973173, + "eval_loss": 0.8172363638877869, + "eval_precision": 0.9027154396135877, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.0085, + "eval_samples_per_second": 450.52, + "eval_steps_per_second": 3.621, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 8.553890356314556e-05, + "loss": 0.8015, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 9.775874692930921e-05, + "loss": 0.7861, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8849778270509978, + "eval_f1": 0.8865037130264197, + "eval_loss": 0.8436875939369202, + "eval_precision": 0.8932393306490872, + "eval_recall": 0.8849778270509978, + "eval_runtime": 8.5752, + "eval_samples_per_second": 420.749, + "eval_steps_per_second": 3.382, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00010997859029547287, + "loss": 0.7684, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9028803793279178, + "eval_loss": 0.8081041574478149, + "eval_precision": 0.903857437506207, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.3423, + "eval_samples_per_second": 432.495, + "eval_steps_per_second": 3.476, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011842007545068455, + "loss": 0.7711, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011810630992573294, + "loss": 0.7634, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9067032873213865, + "eval_loss": 0.8178200125694275, + "eval_precision": 0.9055547260013138, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.0759, + "eval_samples_per_second": 446.762, + "eval_steps_per_second": 3.591, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00011740608163483467, + "loss": 0.7565, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00011632398602087223, + "loss": 0.7531, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8070953436807096, + "eval_f1": 0.8307447955720016, + "eval_loss": 0.9529986381530762, + "eval_precision": 0.8811216651383346, + "eval_recall": 0.8070953436807096, + "eval_runtime": 8.137, + "eval_samples_per_second": 443.407, + "eval_steps_per_second": 3.564, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00011486712463722754, + "loss": 0.746, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9088707382436096, + "eval_loss": 0.8120781779289246, + "eval_precision": 0.9060999886720389, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.4467, + "eval_samples_per_second": 427.148, + "eval_steps_per_second": 3.433, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001130450585418631, + "loss": 0.7441, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00011086974555022023, + "loss": 0.7366, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.8990952855541333, + "eval_loss": 0.824906587600708, + "eval_precision": 0.9061946214416738, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.27, + "eval_samples_per_second": 436.275, + "eval_steps_per_second": 3.507, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00010835546175873, + "loss": 0.7323, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9002217294900222, + "eval_f1": 0.9012399347617073, + "eval_loss": 0.8290796279907227, + "eval_precision": 0.9052326255514389, + "eval_recall": 0.9002217294900222, + "eval_runtime": 8.3024, + "eval_samples_per_second": 434.573, + "eval_steps_per_second": 3.493, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00010551870785396043, + "loss": 0.7313, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00010237810082227219, + "loss": 0.725, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9014249052263176, + "eval_loss": 0.8157982230186462, + "eval_precision": 0.9081945289870674, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.2951, + "eval_samples_per_second": 434.956, + "eval_steps_per_second": 3.496, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 9.895425177066865e-05, + "loss": 0.7242, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 9.526963066067437e-05, + "loss": 0.7188, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9146196519587654, + "eval_loss": 0.8068662285804749, + "eval_precision": 0.9120081137034548, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.2252, + "eval_samples_per_second": 438.654, + "eval_steps_per_second": 3.526, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 9.134841884296409e-05, + "loss": 0.7204, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9092891599955425, + "eval_loss": 0.8142066597938538, + "eval_precision": 0.9095107691249883, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.1951, + "eval_samples_per_second": 440.263, + "eval_steps_per_second": 3.539, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 8.721635036052442e-05, + "loss": 0.7152, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 8.290054306184049e-05, + "loss": 0.7135, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9103487881551395, + "eval_loss": 0.8043306469917297, + "eval_precision": 0.9097246610377273, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.4787, + "eval_samples_per_second": 425.535, + "eval_steps_per_second": 3.42, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 7.842932063247419e-05, + "loss": 0.7131, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 7.383202671300115e-05, + "loss": 0.7123, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9125587073196101, + "eval_loss": 0.80075603723526, + "eval_precision": 0.9124672143244127, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.2538, + "eval_samples_per_second": 437.13, + "eval_steps_per_second": 3.514, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.913883232320964e-05, + "loss": 0.7087, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.9037420660128592, + "eval_loss": 0.82823246717453, + "eval_precision": 0.9070644554543488, + "eval_recall": 0.9024390243902439, + "eval_runtime": 7.9823, + "eval_samples_per_second": 452.002, + "eval_steps_per_second": 3.633, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.438053785639281e-05, + "loss": 0.708, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.958837094320274e-05, + "loss": 0.7058, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.918139928004074, + "eval_loss": 0.8006211519241333, + "eval_precision": 0.9155745392875521, + "eval_recall": 0.9221175166297118, + "eval_runtime": 8.0911, + "eval_samples_per_second": 445.922, + "eval_steps_per_second": 3.584, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.479378151164032e-05, + "loss": 0.7061, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.0028235388156865e-05, + "loss": 0.7007, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.911467714185253, + "eval_loss": 0.8088358640670776, + "eval_precision": 0.9095502759728672, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.3048, + "eval_samples_per_second": 434.446, + "eval_steps_per_second": 3.492, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.532300779441795e-05, + "loss": 0.7071, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9102457116550721, + "eval_loss": 0.8067628741264343, + "eval_precision": 0.9095707265020937, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.2606, + "eval_samples_per_second": 436.772, + "eval_steps_per_second": 3.511, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.070897809496389e-05, + "loss": 0.699, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.621642714279302e-05, + "loss": 0.6963, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9110921381198841, + "eval_loss": 0.8131796717643738, + "eval_precision": 0.9102718845983578, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.5042, + "eval_samples_per_second": 424.259, + "eval_steps_per_second": 3.41, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.18748385528431e-05, + "loss": 0.6993, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9118107776438924, + "eval_loss": 0.8081905841827393, + "eval_precision": 0.9099277181845643, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.4643, + "eval_samples_per_second": 426.259, + "eval_steps_per_second": 3.426, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.771270520756931e-05, + "loss": 0.699, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.3757342264479745e-05, + "loss": 0.695, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9093429388548893, + "eval_loss": 0.8111734390258789, + "eval_precision": 0.9074999707107355, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9698, + "eval_samples_per_second": 452.711, + "eval_steps_per_second": 3.639, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.0034707892818282e-05, + "loss": 0.696, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.6569232915860257e-05, + "loss": 0.6953, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.916865936054669, + "eval_loss": 0.8013141751289368, + "eval_precision": 0.915740899601062, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.377, + "eval_samples_per_second": 430.703, + "eval_steps_per_second": 3.462, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.3383660476840981e-05, + "loss": 0.6953, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.914589050094206, + "eval_loss": 0.8078990578651428, + "eval_precision": 0.9152464196196067, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.115, + "eval_samples_per_second": 444.608, + "eval_steps_per_second": 3.574, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.0498896780753623e-05, + "loss": 0.6945, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 7.933873891565259e-06, + "loss": 0.6922, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.9178570407241106, + "eval_loss": 0.7991681098937988, + "eval_precision": 0.9160698180928497, + "eval_recall": 0.9226718403547672, + "eval_runtime": 8.0279, + "eval_samples_per_second": 449.434, + "eval_steps_per_second": 3.612, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.705425485282367e-06, + "loss": 0.6915, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.828176374270756e-06, + "loss": 0.6891, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9190093173009881, + "eval_loss": 0.8051531910896301, + "eval_precision": 0.9170832022797123, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.1305, + "eval_samples_per_second": 443.759, + "eval_steps_per_second": 3.567, + "step": 573 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.04380727559247777, + "learning_rate": 0.00011843848185666308, + "metric": "eval/loss", + "weight_decay": 0.04759846929286333 + } +} diff --git a/run-22pm9fz7/checkpoint-573/training_args.bin b/run-22pm9fz7/checkpoint-573/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3966c5907800ad4fb7e9f94a3fa3d26bd1bee02 --- /dev/null +++ b/run-22pm9fz7/checkpoint-573/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd44ac72f55da97bd4c201bcd3d83cc4a8480c26354d61666839e3c3964bca85 +size 4792 diff --git a/run-22pm9fz7/checkpoint-630/model.safetensors b/run-22pm9fz7/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6fd5316ac491f9347c53e99fc32c83ca6f59755a --- /dev/null +++ b/run-22pm9fz7/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251b9dcae8e64bde930b0291bce3a3e77bdd4bd952300070a200d94c5aa5c255 +size 198025308 diff --git a/run-22pm9fz7/checkpoint-630/optimizer.pt b/run-22pm9fz7/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..62b14d62e9f244c82da07ac6e995867eb208fba0 --- /dev/null +++ b/run-22pm9fz7/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7719775f550654b470db266cb6c4656645d42c96525e65c36f922760fc1f0481 +size 395900602 diff --git a/run-22pm9fz7/checkpoint-630/rng_state.pth b/run-22pm9fz7/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-22pm9fz7/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-22pm9fz7/checkpoint-630/scheduler.pt b/run-22pm9fz7/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..80ddce41994ec4422e26824526d37c5d2b35d339 --- /dev/null +++ b/run-22pm9fz7/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b31486bf2978f7c303d9a41141bea2194c0b81eed86a868b8823cbc64031b3f4 +size 1064 diff --git a/run-22pm9fz7/checkpoint-630/trainer_state.json b/run-22pm9fz7/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..20db28db0acaa1df95142dee0686d38113603aac --- /dev/null +++ b/run-22pm9fz7/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9190093173009881, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-22pm9fz7/checkpoint-573", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.2219843366163651e-05, + "loss": 1.486, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8276053215077606, + "eval_f1": 0.7506333806665745, + "eval_loss": 1.1113407611846924, + "eval_precision": 0.6867607734554959, + "eval_recall": 0.8276053215077606, + "eval_runtime": 8.3733, + "eval_samples_per_second": 430.891, + "eval_steps_per_second": 3.463, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.4439686732327303e-05, + "loss": 1.1919, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 3.665953009849096e-05, + "loss": 0.9574, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8592017738359202, + "eval_f1": 0.8158688229267518, + "eval_loss": 0.9284856915473938, + "eval_precision": 0.8413284596404325, + "eval_recall": 0.8592017738359202, + "eval_runtime": 8.4795, + "eval_samples_per_second": 425.497, + "eval_steps_per_second": 3.42, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 4.8879373464654605e-05, + "loss": 0.8789, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.887994515834578, + "eval_loss": 0.8563220500946045, + "eval_precision": 0.888627325917696, + "eval_recall": 0.8968957871396895, + "eval_runtime": 8.648, + "eval_samples_per_second": 417.204, + "eval_steps_per_second": 3.353, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 6.109921683081827e-05, + "loss": 0.8471, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 7.331906019698191e-05, + "loss": 0.8107, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9058964979973173, + "eval_loss": 0.8172363638877869, + "eval_precision": 0.9027154396135877, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.0085, + "eval_samples_per_second": 450.52, + "eval_steps_per_second": 3.621, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 8.553890356314556e-05, + "loss": 0.8015, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 9.775874692930921e-05, + "loss": 0.7861, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8849778270509978, + "eval_f1": 0.8865037130264197, + "eval_loss": 0.8436875939369202, + "eval_precision": 0.8932393306490872, + "eval_recall": 0.8849778270509978, + "eval_runtime": 8.5752, + "eval_samples_per_second": 420.749, + "eval_steps_per_second": 3.382, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00010997859029547287, + "loss": 0.7684, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9028803793279178, + "eval_loss": 0.8081041574478149, + "eval_precision": 0.903857437506207, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.3423, + "eval_samples_per_second": 432.495, + "eval_steps_per_second": 3.476, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011842007545068455, + "loss": 0.7711, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011810630992573294, + "loss": 0.7634, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9067032873213865, + "eval_loss": 0.8178200125694275, + "eval_precision": 0.9055547260013138, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.0759, + "eval_samples_per_second": 446.762, + "eval_steps_per_second": 3.591, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00011740608163483467, + "loss": 0.7565, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00011632398602087223, + "loss": 0.7531, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8070953436807096, + "eval_f1": 0.8307447955720016, + "eval_loss": 0.9529986381530762, + "eval_precision": 0.8811216651383346, + "eval_recall": 0.8070953436807096, + "eval_runtime": 8.137, + "eval_samples_per_second": 443.407, + "eval_steps_per_second": 3.564, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00011486712463722754, + "loss": 0.746, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9088707382436096, + "eval_loss": 0.8120781779289246, + "eval_precision": 0.9060999886720389, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.4467, + "eval_samples_per_second": 427.148, + "eval_steps_per_second": 3.433, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001130450585418631, + "loss": 0.7441, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00011086974555022023, + "loss": 0.7366, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.8990952855541333, + "eval_loss": 0.824906587600708, + "eval_precision": 0.9061946214416738, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.27, + "eval_samples_per_second": 436.275, + "eval_steps_per_second": 3.507, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00010835546175873, + "loss": 0.7323, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9002217294900222, + "eval_f1": 0.9012399347617073, + "eval_loss": 0.8290796279907227, + "eval_precision": 0.9052326255514389, + "eval_recall": 0.9002217294900222, + "eval_runtime": 8.3024, + "eval_samples_per_second": 434.573, + "eval_steps_per_second": 3.493, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00010551870785396043, + "loss": 0.7313, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00010237810082227219, + "loss": 0.725, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9014249052263176, + "eval_loss": 0.8157982230186462, + "eval_precision": 0.9081945289870674, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.2951, + "eval_samples_per_second": 434.956, + "eval_steps_per_second": 3.496, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 9.895425177066865e-05, + "loss": 0.7242, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 9.526963066067437e-05, + "loss": 0.7188, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9146196519587654, + "eval_loss": 0.8068662285804749, + "eval_precision": 0.9120081137034548, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.2252, + "eval_samples_per_second": 438.654, + "eval_steps_per_second": 3.526, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 9.134841884296409e-05, + "loss": 0.7204, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9092891599955425, + "eval_loss": 0.8142066597938538, + "eval_precision": 0.9095107691249883, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.1951, + "eval_samples_per_second": 440.263, + "eval_steps_per_second": 3.539, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 8.721635036052442e-05, + "loss": 0.7152, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 8.290054306184049e-05, + "loss": 0.7135, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9103487881551395, + "eval_loss": 0.8043306469917297, + "eval_precision": 0.9097246610377273, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.4787, + "eval_samples_per_second": 425.535, + "eval_steps_per_second": 3.42, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 7.842932063247419e-05, + "loss": 0.7131, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 7.383202671300115e-05, + "loss": 0.7123, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9125587073196101, + "eval_loss": 0.80075603723526, + "eval_precision": 0.9124672143244127, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.2538, + "eval_samples_per_second": 437.13, + "eval_steps_per_second": 3.514, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.913883232320964e-05, + "loss": 0.7087, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.9037420660128592, + "eval_loss": 0.82823246717453, + "eval_precision": 0.9070644554543488, + "eval_recall": 0.9024390243902439, + "eval_runtime": 7.9823, + "eval_samples_per_second": 452.002, + "eval_steps_per_second": 3.633, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.438053785639281e-05, + "loss": 0.708, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.958837094320274e-05, + "loss": 0.7058, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.918139928004074, + "eval_loss": 0.8006211519241333, + "eval_precision": 0.9155745392875521, + "eval_recall": 0.9221175166297118, + "eval_runtime": 8.0911, + "eval_samples_per_second": 445.922, + "eval_steps_per_second": 3.584, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.479378151164032e-05, + "loss": 0.7061, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.0028235388156865e-05, + "loss": 0.7007, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.911467714185253, + "eval_loss": 0.8088358640670776, + "eval_precision": 0.9095502759728672, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.3048, + "eval_samples_per_second": 434.446, + "eval_steps_per_second": 3.492, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.532300779441795e-05, + "loss": 0.7071, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9102457116550721, + "eval_loss": 0.8067628741264343, + "eval_precision": 0.9095707265020937, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.2606, + "eval_samples_per_second": 436.772, + "eval_steps_per_second": 3.511, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.070897809496389e-05, + "loss": 0.699, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.621642714279302e-05, + "loss": 0.6963, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9110921381198841, + "eval_loss": 0.8131796717643738, + "eval_precision": 0.9102718845983578, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.5042, + "eval_samples_per_second": 424.259, + "eval_steps_per_second": 3.41, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.18748385528431e-05, + "loss": 0.6993, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9118107776438924, + "eval_loss": 0.8081905841827393, + "eval_precision": 0.9099277181845643, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.4643, + "eval_samples_per_second": 426.259, + "eval_steps_per_second": 3.426, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.771270520756931e-05, + "loss": 0.699, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.3757342264479745e-05, + "loss": 0.695, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9093429388548893, + "eval_loss": 0.8111734390258789, + "eval_precision": 0.9074999707107355, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9698, + "eval_samples_per_second": 452.711, + "eval_steps_per_second": 3.639, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.0034707892818282e-05, + "loss": 0.696, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.6569232915860257e-05, + "loss": 0.6953, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.916865936054669, + "eval_loss": 0.8013141751289368, + "eval_precision": 0.915740899601062, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.377, + "eval_samples_per_second": 430.703, + "eval_steps_per_second": 3.462, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.3383660476840981e-05, + "loss": 0.6953, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.914589050094206, + "eval_loss": 0.8078990578651428, + "eval_precision": 0.9152464196196067, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.115, + "eval_samples_per_second": 444.608, + "eval_steps_per_second": 3.574, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.0498896780753623e-05, + "loss": 0.6945, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 7.933873891565259e-06, + "loss": 0.6922, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.9178570407241106, + "eval_loss": 0.7991681098937988, + "eval_precision": 0.9160698180928497, + "eval_recall": 0.9226718403547672, + "eval_runtime": 8.0279, + "eval_samples_per_second": 449.434, + "eval_steps_per_second": 3.612, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.705425485282367e-06, + "loss": 0.6915, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.828176374270756e-06, + "loss": 0.6891, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9190093173009881, + "eval_loss": 0.8051531910896301, + "eval_precision": 0.9170832022797123, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.1305, + "eval_samples_per_second": 443.759, + "eval_steps_per_second": 3.567, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.3144465278575693e-06, + "loss": 0.6926, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9109826412632926, + "eval_loss": 0.8063508868217468, + "eval_precision": 0.9094636475940526, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.2034, + "eval_samples_per_second": 439.816, + "eval_steps_per_second": 3.535, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.1741702191065044e-06, + "loss": 0.6939, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.1483082838826053e-07, + "loss": 0.6932, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.9176131090408187, + "eval_loss": 0.8002130389213562, + "eval_precision": 0.9141152774640595, + "eval_recall": 0.9226718403547672, + "eval_runtime": 8.3732, + "eval_samples_per_second": 430.898, + "eval_steps_per_second": 3.463, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 4.14117316158514e-08, + "loss": 0.695, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9182832502545885, + "eval_loss": 0.8041395545005798, + "eval_precision": 0.9178198224542571, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.142, + "eval_samples_per_second": 443.134, + "eval_steps_per_second": 3.562, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.04380727559247777, + "learning_rate": 0.00011843848185666308, + "metric": "eval/loss", + "weight_decay": 0.04759846929286333 + } +} diff --git a/run-22pm9fz7/checkpoint-630/training_args.bin b/run-22pm9fz7/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3966c5907800ad4fb7e9f94a3fa3d26bd1bee02 --- /dev/null +++ b/run-22pm9fz7/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd44ac72f55da97bd4c201bcd3d83cc4a8480c26354d61666839e3c3964bca85 +size 4792 diff --git a/run-2787ye3h/checkpoint-1147/model.safetensors b/run-2787ye3h/checkpoint-1147/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98d320507662b2d0a99250060bd1028e247a0d60 --- /dev/null +++ b/run-2787ye3h/checkpoint-1147/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:756a7990e7702b773466531078bb3fe9016548435c2876cd65cae0c9ab2236cb +size 198025308 diff --git a/run-2787ye3h/checkpoint-1147/optimizer.pt b/run-2787ye3h/checkpoint-1147/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..443dfd8627f33796f75f5b07275b3dd1200b21c0 --- /dev/null +++ b/run-2787ye3h/checkpoint-1147/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb84fa967a9ded82440d1c6f3316c7c9fc0d7c5bfbc68c4202ac2de0ff056359 +size 395900602 diff --git a/run-2787ye3h/checkpoint-1147/rng_state.pth b/run-2787ye3h/checkpoint-1147/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..43b1a3175dffb3289ba56a1e7f78b36ca1615834 --- /dev/null +++ b/run-2787ye3h/checkpoint-1147/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2d43d63184b1920f250efdd6f38efa027691f238090c0a0b0f43317419a2de +size 14244 diff --git a/run-2787ye3h/checkpoint-1147/scheduler.pt b/run-2787ye3h/checkpoint-1147/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7cd09f532935011c5d0be7f9e80728ea7d804bb --- /dev/null +++ b/run-2787ye3h/checkpoint-1147/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a440e59de37db302633fa345da2c0920b23c9b167bc8e7ac5218b25bb8dc0d7e +size 1064 diff --git a/run-2787ye3h/checkpoint-1147/trainer_state.json b/run-2787ye3h/checkpoint-1147/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1d332dbf7e835f0b573c23e41d01eb7193f4bd5e --- /dev/null +++ b/run-2787ye3h/checkpoint-1147/trainer_state.json @@ -0,0 +1,534 @@ +{ + "best_metric": 0.9273835920177383, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-2787ye3h/checkpoint-1147", + "epoch": 26.988235294117647, + "eval_steps": 500, + "global_step": 1147, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.626442337305279e-05, + "loss": 1.293, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8755543237250555, + "eval_loss": 0.9349600672721863, + "eval_runtime": 6.9886, + "eval_samples_per_second": 516.266, + "eval_steps_per_second": 8.156, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 7.252884674610559e-05, + "loss": 0.9064, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00010879327011915836, + "loss": 0.8316, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8049944639205933, + "eval_runtime": 6.9141, + "eval_samples_per_second": 521.829, + "eval_steps_per_second": 8.244, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00014505769349221117, + "loss": 0.7997, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8133141398429871, + "eval_runtime": 6.6693, + "eval_samples_per_second": 540.987, + "eval_steps_per_second": 8.547, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00018132211686526395, + "loss": 0.7971, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00018108536618532123, + "loss": 0.7806, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8043554425239563, + "eval_runtime": 6.9887, + "eval_samples_per_second": 516.26, + "eval_steps_per_second": 8.156, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00018037635063854584, + "loss": 0.7788, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00017919877324618395, + "loss": 0.764, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.7976019382476807, + "eval_runtime": 6.7358, + "eval_samples_per_second": 535.642, + "eval_steps_per_second": 8.462, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00017755878421766717, + "loss": 0.7658, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8304137587547302, + "eval_runtime": 6.8918, + "eval_samples_per_second": 523.522, + "eval_steps_per_second": 8.271, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00017546494882951695, + "loss": 0.751, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00017292820269092213, + "loss": 0.7533, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8278865218162537, + "eval_runtime": 6.9038, + "eval_samples_per_second": 522.607, + "eval_steps_per_second": 8.256, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00016996179462962718, + "loss": 0.747, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00016658121749642477, + "loss": 0.7294, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8298162817955017, + "eval_runtime": 6.8854, + "eval_samples_per_second": 524.008, + "eval_steps_per_second": 8.278, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001628041272496441, + "loss": 0.7325, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8046008944511414, + "eval_runtime": 6.8095, + "eval_samples_per_second": 529.85, + "eval_steps_per_second": 8.371, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001586502507422381, + "loss": 0.737, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001541412826930757, + "loss": 0.7235, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8011636734008789, + "eval_runtime": 7.0572, + "eval_samples_per_second": 511.252, + "eval_steps_per_second": 8.077, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00014930077238053366, + "loss": 0.7212, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8091404438018799, + "eval_runtime": 6.6144, + "eval_samples_per_second": 545.478, + "eval_steps_per_second": 8.618, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00014415400065016071, + "loss": 0.7204, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00013872784787877452, + "loss": 0.7154, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8747228381374723, + "eval_loss": 0.8664525747299194, + "eval_runtime": 6.7378, + "eval_samples_per_second": 535.484, + "eval_steps_per_second": 8.46, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00013305065358458295, + "loss": 0.7148, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00012715206841655379, + "loss": 0.7122, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8063619136810303, + "eval_runtime": 6.798, + "eval_samples_per_second": 530.746, + "eval_steps_per_second": 8.385, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00012106289929605711, + "loss": 0.7096, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8059455752372742, + "eval_runtime": 6.7495, + "eval_samples_per_second": 534.558, + "eval_steps_per_second": 8.445, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00011481494851956916, + "loss": 0.7157, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001084408476627666, + "loss": 0.7021, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8017489314079285, + "eval_runtime": 6.9058, + "eval_samples_per_second": 522.457, + "eval_steps_per_second": 8.254, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0001019738871534904, + "loss": 0.7021, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.544784240368045e-05, + "loss": 0.7002, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.816520631313324, + "eval_runtime": 6.8, + "eval_samples_per_second": 530.585, + "eval_steps_per_second": 8.382, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.889679740835216e-05, + "loss": 0.705, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8022974133491516, + "eval_runtime": 6.9057, + "eval_samples_per_second": 522.468, + "eval_steps_per_second": 8.254, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.235496673291634e-05, + "loss": 0.7031, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.585651681856075e-05, + "loss": 0.6987, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8067501783370972, + "eval_runtime": 6.7889, + "eval_samples_per_second": 531.457, + "eval_steps_per_second": 8.396, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.943538753897311e-05, + "loss": 0.6986, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.312511494037267e-05, + "loss": 0.698, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9262749445676275, + "eval_loss": 0.7983365058898926, + "eval_runtime": 6.7297, + "eval_samples_per_second": 536.129, + "eval_steps_per_second": 8.47, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.695865609063774e-05, + "loss": 0.6899, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8008666038513184, + "eval_runtime": 6.6376, + "eval_samples_per_second": 543.571, + "eval_steps_per_second": 8.587, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.0968216952300726e-05, + "loss": 0.6935, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.518508417838943e-05, + "loss": 0.6922, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.8021017909049988, + "eval_runtime": 6.9244, + "eval_samples_per_second": 521.053, + "eval_steps_per_second": 8.232, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.9639461709605394e-05, + "loss": 0.6895, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.7964810729026794, + "eval_runtime": 6.7682, + "eval_samples_per_second": 533.077, + "eval_steps_per_second": 8.422, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.436031302625408e-05, + "loss": 0.6875, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.937520987880715e-05, + "loss": 0.6865, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.7992194294929504, + "eval_runtime": 6.8278, + "eval_samples_per_second": 528.427, + "eval_steps_per_second": 8.348, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.4710188287142336e-05, + "loss": 0.6886, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.0389612560542293e-05, + "loss": 0.6874, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.794614851474762, + "eval_runtime": 7.0369, + "eval_samples_per_second": 512.726, + "eval_steps_per_second": 8.1, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.6436048048644487e-05, + "loss": 0.6936, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.924889135254989, + "eval_loss": 0.7939379811286926, + "eval_runtime": 6.7889, + "eval_samples_per_second": 531.455, + "eval_steps_per_second": 8.396, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.2870143287933509e-05, + "loss": 0.6864, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 9.710522159297248e-06, + "loss": 0.6875, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.926829268292683, + "eval_loss": 0.7930501699447632, + "eval_runtime": 6.6519, + "eval_samples_per_second": 542.402, + "eval_steps_per_second": 8.569, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.9736866198824056e-06, + "loss": 0.6869, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.67393051725817e-06, + "loss": 0.6863, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9273835920177383, + "eval_loss": 0.7957404851913452, + "eval_runtime": 6.6539, + "eval_samples_per_second": 542.236, + "eval_steps_per_second": 8.566, + "step": 1147 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00018132211686526395, + "metric": "eval/loss", + "warmup_ratio": 0.1030284831764748 + } +} diff --git a/run-2787ye3h/checkpoint-1147/training_args.bin b/run-2787ye3h/checkpoint-1147/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e3059b598b265596cd8f93e4fc95e448221c336 --- /dev/null +++ b/run-2787ye3h/checkpoint-1147/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e2659a8bb759e9044d294378560379668c25ab4ec4cf4e84f05ca8f53c3df5 +size 4792 diff --git a/run-2787ye3h/checkpoint-1260/model.safetensors b/run-2787ye3h/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da333b3ed8d34a281d7522e44762c56ad767e45e --- /dev/null +++ b/run-2787ye3h/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b604effb42ed0baafcac2b4065eccab0998e410df650f41d83ee6e33df57430a +size 198025308 diff --git a/run-2787ye3h/checkpoint-1260/optimizer.pt b/run-2787ye3h/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8aa7af1008748c412109ce26b9063694933039c --- /dev/null +++ b/run-2787ye3h/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7923d81e79b6942020b1e84cafe77e6444202c34dc4ce3ca5698683ce6e57f8d +size 395900602 diff --git a/run-2787ye3h/checkpoint-1260/rng_state.pth b/run-2787ye3h/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-2787ye3h/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-2787ye3h/checkpoint-1260/scheduler.pt b/run-2787ye3h/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c23839cb3dd5f953b48e241cca9bf56f61f05b65 --- /dev/null +++ b/run-2787ye3h/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b5c1cff843287d42e8d539aeaf3ec7dfa105711e1497176ca7268775886c4f +size 1064 diff --git a/run-2787ye3h/checkpoint-1260/trainer_state.json b/run-2787ye3h/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dc697ae04c042e61e0bb6a759bca844f70d13c04 --- /dev/null +++ b/run-2787ye3h/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9273835920177383, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-2787ye3h/checkpoint-1147", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.626442337305279e-05, + "loss": 1.293, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8755543237250555, + "eval_loss": 0.9349600672721863, + "eval_runtime": 6.9886, + "eval_samples_per_second": 516.266, + "eval_steps_per_second": 8.156, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 7.252884674610559e-05, + "loss": 0.9064, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00010879327011915836, + "loss": 0.8316, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8049944639205933, + "eval_runtime": 6.9141, + "eval_samples_per_second": 521.829, + "eval_steps_per_second": 8.244, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00014505769349221117, + "loss": 0.7997, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8133141398429871, + "eval_runtime": 6.6693, + "eval_samples_per_second": 540.987, + "eval_steps_per_second": 8.547, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00018132211686526395, + "loss": 0.7971, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00018108536618532123, + "loss": 0.7806, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8043554425239563, + "eval_runtime": 6.9887, + "eval_samples_per_second": 516.26, + "eval_steps_per_second": 8.156, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00018037635063854584, + "loss": 0.7788, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00017919877324618395, + "loss": 0.764, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.7976019382476807, + "eval_runtime": 6.7358, + "eval_samples_per_second": 535.642, + "eval_steps_per_second": 8.462, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00017755878421766717, + "loss": 0.7658, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8304137587547302, + "eval_runtime": 6.8918, + "eval_samples_per_second": 523.522, + "eval_steps_per_second": 8.271, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00017546494882951695, + "loss": 0.751, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00017292820269092213, + "loss": 0.7533, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8278865218162537, + "eval_runtime": 6.9038, + "eval_samples_per_second": 522.607, + "eval_steps_per_second": 8.256, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00016996179462962718, + "loss": 0.747, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00016658121749642477, + "loss": 0.7294, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8298162817955017, + "eval_runtime": 6.8854, + "eval_samples_per_second": 524.008, + "eval_steps_per_second": 8.278, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001628041272496441, + "loss": 0.7325, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8046008944511414, + "eval_runtime": 6.8095, + "eval_samples_per_second": 529.85, + "eval_steps_per_second": 8.371, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001586502507422381, + "loss": 0.737, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001541412826930757, + "loss": 0.7235, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8011636734008789, + "eval_runtime": 7.0572, + "eval_samples_per_second": 511.252, + "eval_steps_per_second": 8.077, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00014930077238053366, + "loss": 0.7212, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8091404438018799, + "eval_runtime": 6.6144, + "eval_samples_per_second": 545.478, + "eval_steps_per_second": 8.618, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00014415400065016071, + "loss": 0.7204, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00013872784787877452, + "loss": 0.7154, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8747228381374723, + "eval_loss": 0.8664525747299194, + "eval_runtime": 6.7378, + "eval_samples_per_second": 535.484, + "eval_steps_per_second": 8.46, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00013305065358458295, + "loss": 0.7148, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00012715206841655379, + "loss": 0.7122, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8063619136810303, + "eval_runtime": 6.798, + "eval_samples_per_second": 530.746, + "eval_steps_per_second": 8.385, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00012106289929605711, + "loss": 0.7096, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8059455752372742, + "eval_runtime": 6.7495, + "eval_samples_per_second": 534.558, + "eval_steps_per_second": 8.445, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00011481494851956916, + "loss": 0.7157, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001084408476627666, + "loss": 0.7021, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8017489314079285, + "eval_runtime": 6.9058, + "eval_samples_per_second": 522.457, + "eval_steps_per_second": 8.254, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0001019738871534904, + "loss": 0.7021, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.544784240368045e-05, + "loss": 0.7002, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.816520631313324, + "eval_runtime": 6.8, + "eval_samples_per_second": 530.585, + "eval_steps_per_second": 8.382, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.889679740835216e-05, + "loss": 0.705, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8022974133491516, + "eval_runtime": 6.9057, + "eval_samples_per_second": 522.468, + "eval_steps_per_second": 8.254, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.235496673291634e-05, + "loss": 0.7031, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.585651681856075e-05, + "loss": 0.6987, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8067501783370972, + "eval_runtime": 6.7889, + "eval_samples_per_second": 531.457, + "eval_steps_per_second": 8.396, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.943538753897311e-05, + "loss": 0.6986, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.312511494037267e-05, + "loss": 0.698, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9262749445676275, + "eval_loss": 0.7983365058898926, + "eval_runtime": 6.7297, + "eval_samples_per_second": 536.129, + "eval_steps_per_second": 8.47, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.695865609063774e-05, + "loss": 0.6899, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8008666038513184, + "eval_runtime": 6.6376, + "eval_samples_per_second": 543.571, + "eval_steps_per_second": 8.587, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.0968216952300726e-05, + "loss": 0.6935, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.518508417838943e-05, + "loss": 0.6922, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.8021017909049988, + "eval_runtime": 6.9244, + "eval_samples_per_second": 521.053, + "eval_steps_per_second": 8.232, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.9639461709605394e-05, + "loss": 0.6895, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.7964810729026794, + "eval_runtime": 6.7682, + "eval_samples_per_second": 533.077, + "eval_steps_per_second": 8.422, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.436031302625408e-05, + "loss": 0.6875, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.937520987880715e-05, + "loss": 0.6865, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.7992194294929504, + "eval_runtime": 6.8278, + "eval_samples_per_second": 528.427, + "eval_steps_per_second": 8.348, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.4710188287142336e-05, + "loss": 0.6886, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.0389612560542293e-05, + "loss": 0.6874, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.794614851474762, + "eval_runtime": 7.0369, + "eval_samples_per_second": 512.726, + "eval_steps_per_second": 8.1, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.6436048048644487e-05, + "loss": 0.6936, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.924889135254989, + "eval_loss": 0.7939379811286926, + "eval_runtime": 6.7889, + "eval_samples_per_second": 531.455, + "eval_steps_per_second": 8.396, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.2870143287933509e-05, + "loss": 0.6864, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 9.710522159297248e-06, + "loss": 0.6875, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.926829268292683, + "eval_loss": 0.7930501699447632, + "eval_runtime": 6.6519, + "eval_samples_per_second": 542.402, + "eval_steps_per_second": 8.569, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.9736866198824056e-06, + "loss": 0.6869, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.67393051725817e-06, + "loss": 0.6863, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9273835920177383, + "eval_loss": 0.7957404851913452, + "eval_runtime": 6.6539, + "eval_samples_per_second": 542.236, + "eval_steps_per_second": 8.566, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.823264936016118e-06, + "loss": 0.6844, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9237804878048781, + "eval_loss": 0.7975443005561829, + "eval_runtime": 6.6662, + "eval_samples_per_second": 541.236, + "eval_steps_per_second": 8.551, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.4313554667037497e-06, + "loss": 0.6868, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 5.054717247209873e-07, + "loss": 0.6883, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9254434589800443, + "eval_loss": 0.7970326542854309, + "eval_runtime": 6.7789, + "eval_samples_per_second": 532.24, + "eval_steps_per_second": 8.408, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 5.0449382830818864e-08, + "loss": 0.6866, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8019540309906006, + "eval_runtime": 6.7841, + "eval_samples_per_second": 531.828, + "eval_steps_per_second": 8.402, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00018132211686526395, + "metric": "eval/loss", + "warmup_ratio": 0.1030284831764748 + } +} diff --git a/run-2787ye3h/checkpoint-1260/training_args.bin b/run-2787ye3h/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e3059b598b265596cd8f93e4fc95e448221c336 --- /dev/null +++ b/run-2787ye3h/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e2659a8bb759e9044d294378560379668c25ab4ec4cf4e84f05ca8f53c3df5 +size 4792 diff --git a/run-29h0ichm/checkpoint-1232/model.safetensors b/run-29h0ichm/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abb1f5c9d42f240ed616942def5fd17ce52016ed --- /dev/null +++ b/run-29h0ichm/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db5f61d12178aef4ec9a1782f92b84ca6d8a9ec28e8f6601801840b106bc002c +size 198025308 diff --git a/run-29h0ichm/checkpoint-1232/optimizer.pt b/run-29h0ichm/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..339654f29a9a6c4d15a65401f150ba08df865b70 --- /dev/null +++ b/run-29h0ichm/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f140d657b6c4282f0a6d334e25d41399f5bb6bf1c406e335dd03c5aa96c375 +size 395900602 diff --git a/run-29h0ichm/checkpoint-1232/rng_state.pth b/run-29h0ichm/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-29h0ichm/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-29h0ichm/checkpoint-1232/scheduler.pt b/run-29h0ichm/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdbeec6751435c27590795fa04fb24ae0ccab30f --- /dev/null +++ b/run-29h0ichm/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b17d1f4b619b26ffdb52b3d414ce4a6ca0e5dde0237dda6caaedf7baa38e186 +size 1064 diff --git a/run-29h0ichm/checkpoint-1232/trainer_state.json b/run-29h0ichm/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ea2231052d506b4d75dd2f9ff0a1fc587b6ef75d --- /dev/null +++ b/run-29h0ichm/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.924889135254989, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-29h0ichm/checkpoint-1190", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.3002801275853926e-05, + "loss": 1.4288, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9695671796798706, + "eval_runtime": 7.0484, + "eval_samples_per_second": 511.888, + "eval_steps_per_second": 8.087, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 2.6005602551707852e-05, + "loss": 1.0169, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 3.900840382756177e-05, + "loss": 0.8905, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8988359201773836, + "eval_loss": 0.8517076969146729, + "eval_runtime": 6.945, + "eval_samples_per_second": 519.507, + "eval_steps_per_second": 8.207, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 5.2011205103415704e-05, + "loss": 0.8329, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8142409324645996, + "eval_runtime": 7.0042, + "eval_samples_per_second": 515.123, + "eval_steps_per_second": 8.138, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 6.501400637926963e-05, + "loss": 0.8067, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 7.801680765512355e-05, + "loss": 0.7946, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8081430792808533, + "eval_runtime": 6.8448, + "eval_samples_per_second": 527.116, + "eval_steps_per_second": 8.327, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 9.101960893097748e-05, + "loss": 0.7835, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010402241020683141, + "loss": 0.7776, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8002059459686279, + "eval_runtime": 6.7746, + "eval_samples_per_second": 532.575, + "eval_steps_per_second": 8.414, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011702521148268534, + "loss": 0.7733, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8091906309127808, + "eval_runtime": 6.7326, + "eval_samples_per_second": 535.898, + "eval_steps_per_second": 8.466, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00013002801275853926, + "loss": 0.769, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001430308140343932, + "loss": 0.7606, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8018920421600342, + "eval_runtime": 6.7549, + "eval_samples_per_second": 534.135, + "eval_steps_per_second": 8.438, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00014277948445408833, + "loss": 0.7627, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00014202726222924556, + "loss": 0.7504, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.7977239489555359, + "eval_runtime": 6.9911, + "eval_samples_per_second": 516.087, + "eval_steps_per_second": 8.153, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00014077943449179769, + "loss": 0.7386, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.7860310421286031, + "eval_loss": 1.0136197805404663, + "eval_runtime": 6.7992, + "eval_samples_per_second": 530.649, + "eval_steps_per_second": 8.383, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00013904477182796176, + "loss": 0.7485, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00013683546663256446, + "loss": 0.7377, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8229023814201355, + "eval_runtime": 6.8204, + "eval_samples_per_second": 528.999, + "eval_steps_per_second": 8.357, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001341670474125579, + "loss": 0.7304, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.7997891306877136, + "eval_runtime": 6.9681, + "eval_samples_per_second": 517.789, + "eval_steps_per_second": 8.18, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00013105826964205915, + "loss": 0.7257, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00012753098393605898, + "loss": 0.7229, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8081395626068115, + "eval_runtime": 6.8807, + "eval_samples_per_second": 524.363, + "eval_steps_per_second": 8.284, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.000123609982469365, + "loss": 0.7229, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00011932282472025194, + "loss": 0.7213, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8033608198165894, + "eval_runtime": 6.492, + "eval_samples_per_second": 555.759, + "eval_steps_per_second": 8.78, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011469964376361164, + "loss": 0.7199, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8193542957305908, + "eval_runtime": 6.5648, + "eval_samples_per_second": 549.598, + "eval_steps_per_second": 8.683, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010977293447510712, + "loss": 0.72, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010457732513497633, + "loss": 0.7083, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8841463414634146, + "eval_loss": 0.8560231328010559, + "eval_runtime": 7.0946, + "eval_samples_per_second": 508.554, + "eval_steps_per_second": 8.034, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 9.914933403681078e-05, + "loss": 0.7122, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.352711281202806e-05, + "loss": 0.7098, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8119896650314331, + "eval_runtime": 7.0574, + "eval_samples_per_second": 511.234, + "eval_steps_per_second": 8.077, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.775017827413008e-05, + "loss": 0.7091, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8150137662887573, + "eval_runtime": 6.6809, + "eval_samples_per_second": 540.044, + "eval_steps_per_second": 8.532, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.185913466752837e-05, + "loss": 0.7011, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.589538827316175e-05, + "loss": 0.7042, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8104116916656494, + "eval_runtime": 6.7801, + "eval_samples_per_second": 532.147, + "eval_steps_per_second": 8.407, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.990085637685318e-05, + "loss": 0.7005, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.391767264597476e-05, + "loss": 0.7007, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8980044345898004, + "eval_loss": 0.8331359028816223, + "eval_runtime": 6.6744, + "eval_samples_per_second": 540.575, + "eval_steps_per_second": 8.54, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.798789098523564e-05, + "loss": 0.7058, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8079680800437927, + "eval_runtime": 6.7387, + "eval_samples_per_second": 535.412, + "eval_steps_per_second": 8.459, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.215318995309626e-05, + "loss": 0.6963, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.6454579816372486e-05, + "loss": 0.6957, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.810405969619751, + "eval_runtime": 6.9683, + "eval_samples_per_second": 517.773, + "eval_steps_per_second": 8.18, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 4.093211430205049e-05, + "loss": 0.6931, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.808899462223053, + "eval_runtime": 6.8846, + "eval_samples_per_second": 524.07, + "eval_steps_per_second": 8.279, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.5624609072316884e-05, + "loss": 0.695, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.05693689015551e-05, + "loss": 0.693, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.8023791909217834, + "eval_runtime": 6.7518, + "eval_samples_per_second": 534.373, + "eval_steps_per_second": 8.442, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.5801925472893242e-05, + "loss": 0.6939, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.1355787637249674e-05, + "loss": 0.6934, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8015068769454956, + "eval_runtime": 6.7805, + "eval_samples_per_second": 532.113, + "eval_steps_per_second": 8.406, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.7262205890225772e-05, + "loss": 0.6911, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8050680756568909, + "eval_runtime": 6.6202, + "eval_samples_per_second": 544.999, + "eval_steps_per_second": 8.61, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.3549952722263349e-05, + "loss": 0.6896, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.0245120385916187e-05, + "loss": 0.6879, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8068881034851074, + "eval_runtime": 6.6458, + "eval_samples_per_second": 542.9, + "eval_steps_per_second": 8.577, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 7.370937501665227e-06, + "loss": 0.6903, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.9476057912973655e-06, + "loss": 0.6932, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.8008785247802734, + "eval_runtime": 6.742, + "eval_samples_per_second": 535.156, + "eval_steps_per_second": 8.455, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.9921580863969824e-06, + "loss": 0.6912, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.924889135254989, + "eval_loss": 0.7968100309371948, + "eval_runtime": 6.7991, + "eval_samples_per_second": 530.661, + "eval_steps_per_second": 8.383, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.5183386099637947e-06, + "loss": 0.6861, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 5.365063726196294e-07, + "loss": 0.6877, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8051350712776184, + "eval_runtime": 6.5909, + "eval_samples_per_second": 547.421, + "eval_steps_per_second": 8.648, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0001430308140343932, + "metric": "eval/loss", + "warmup_ratio": 0.22639169356076547 + } +} diff --git a/run-29h0ichm/checkpoint-1232/training_args.bin b/run-29h0ichm/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..44fd806571fab0fcfc66673efcf3a3a3b30790a8 --- /dev/null +++ b/run-29h0ichm/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0693baa920a9bda1a4c27253ea57f3d05a8f31b953077ab5e810badcb810c98b +size 4792 diff --git a/run-29h0ichm/checkpoint-1260/model.safetensors b/run-29h0ichm/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e1d5b7f3aa41141192d16c53bf8951e011b0fda --- /dev/null +++ b/run-29h0ichm/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6158651377daf6c5e0d6364f77841be941b68097674314d91b4f1c8f9215631f +size 198025308 diff --git a/run-29h0ichm/checkpoint-1260/optimizer.pt b/run-29h0ichm/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3cdc057241b774777e6bd1eaf97f1138cdd5c40 --- /dev/null +++ b/run-29h0ichm/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8665691df2290d9f037c78567a88f70cb04cc5b78b74b346723c69a84fa43e87 +size 395900602 diff --git a/run-29h0ichm/checkpoint-1260/rng_state.pth b/run-29h0ichm/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-29h0ichm/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-29h0ichm/checkpoint-1260/scheduler.pt b/run-29h0ichm/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dec385cc3a521e90f299fece5633e910d95f604a --- /dev/null +++ b/run-29h0ichm/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76cb7c58830aba3a986dbdaa4db29fc7b8b474709f29b3cc03dab1a8ce031e07 +size 1064 diff --git a/run-29h0ichm/checkpoint-1260/trainer_state.json b/run-29h0ichm/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fee07c8e6f3cd23c9ac9ece52c02f14600af68f6 --- /dev/null +++ b/run-29h0ichm/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9251662971175166, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-29h0ichm/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.3002801275853926e-05, + "loss": 1.4288, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9695671796798706, + "eval_runtime": 7.0484, + "eval_samples_per_second": 511.888, + "eval_steps_per_second": 8.087, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 2.6005602551707852e-05, + "loss": 1.0169, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 3.900840382756177e-05, + "loss": 0.8905, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8988359201773836, + "eval_loss": 0.8517076969146729, + "eval_runtime": 6.945, + "eval_samples_per_second": 519.507, + "eval_steps_per_second": 8.207, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 5.2011205103415704e-05, + "loss": 0.8329, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8142409324645996, + "eval_runtime": 7.0042, + "eval_samples_per_second": 515.123, + "eval_steps_per_second": 8.138, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 6.501400637926963e-05, + "loss": 0.8067, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 7.801680765512355e-05, + "loss": 0.7946, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8081430792808533, + "eval_runtime": 6.8448, + "eval_samples_per_second": 527.116, + "eval_steps_per_second": 8.327, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 9.101960893097748e-05, + "loss": 0.7835, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010402241020683141, + "loss": 0.7776, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8002059459686279, + "eval_runtime": 6.7746, + "eval_samples_per_second": 532.575, + "eval_steps_per_second": 8.414, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011702521148268534, + "loss": 0.7733, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8091906309127808, + "eval_runtime": 6.7326, + "eval_samples_per_second": 535.898, + "eval_steps_per_second": 8.466, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00013002801275853926, + "loss": 0.769, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001430308140343932, + "loss": 0.7606, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8018920421600342, + "eval_runtime": 6.7549, + "eval_samples_per_second": 534.135, + "eval_steps_per_second": 8.438, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00014277948445408833, + "loss": 0.7627, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00014202726222924556, + "loss": 0.7504, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.7977239489555359, + "eval_runtime": 6.9911, + "eval_samples_per_second": 516.087, + "eval_steps_per_second": 8.153, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00014077943449179769, + "loss": 0.7386, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.7860310421286031, + "eval_loss": 1.0136197805404663, + "eval_runtime": 6.7992, + "eval_samples_per_second": 530.649, + "eval_steps_per_second": 8.383, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00013904477182796176, + "loss": 0.7485, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00013683546663256446, + "loss": 0.7377, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8229023814201355, + "eval_runtime": 6.8204, + "eval_samples_per_second": 528.999, + "eval_steps_per_second": 8.357, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001341670474125579, + "loss": 0.7304, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.7997891306877136, + "eval_runtime": 6.9681, + "eval_samples_per_second": 517.789, + "eval_steps_per_second": 8.18, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00013105826964205915, + "loss": 0.7257, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00012753098393605898, + "loss": 0.7229, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8081395626068115, + "eval_runtime": 6.8807, + "eval_samples_per_second": 524.363, + "eval_steps_per_second": 8.284, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.000123609982469365, + "loss": 0.7229, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00011932282472025194, + "loss": 0.7213, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8033608198165894, + "eval_runtime": 6.492, + "eval_samples_per_second": 555.759, + "eval_steps_per_second": 8.78, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011469964376361164, + "loss": 0.7199, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8193542957305908, + "eval_runtime": 6.5648, + "eval_samples_per_second": 549.598, + "eval_steps_per_second": 8.683, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010977293447510712, + "loss": 0.72, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010457732513497633, + "loss": 0.7083, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8841463414634146, + "eval_loss": 0.8560231328010559, + "eval_runtime": 7.0946, + "eval_samples_per_second": 508.554, + "eval_steps_per_second": 8.034, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 9.914933403681078e-05, + "loss": 0.7122, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.352711281202806e-05, + "loss": 0.7098, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8119896650314331, + "eval_runtime": 7.0574, + "eval_samples_per_second": 511.234, + "eval_steps_per_second": 8.077, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.775017827413008e-05, + "loss": 0.7091, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8150137662887573, + "eval_runtime": 6.6809, + "eval_samples_per_second": 540.044, + "eval_steps_per_second": 8.532, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.185913466752837e-05, + "loss": 0.7011, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.589538827316175e-05, + "loss": 0.7042, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8104116916656494, + "eval_runtime": 6.7801, + "eval_samples_per_second": 532.147, + "eval_steps_per_second": 8.407, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.990085637685318e-05, + "loss": 0.7005, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.391767264597476e-05, + "loss": 0.7007, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8980044345898004, + "eval_loss": 0.8331359028816223, + "eval_runtime": 6.6744, + "eval_samples_per_second": 540.575, + "eval_steps_per_second": 8.54, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.798789098523564e-05, + "loss": 0.7058, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8079680800437927, + "eval_runtime": 6.7387, + "eval_samples_per_second": 535.412, + "eval_steps_per_second": 8.459, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.215318995309626e-05, + "loss": 0.6963, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.6454579816372486e-05, + "loss": 0.6957, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.810405969619751, + "eval_runtime": 6.9683, + "eval_samples_per_second": 517.773, + "eval_steps_per_second": 8.18, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 4.093211430205049e-05, + "loss": 0.6931, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.808899462223053, + "eval_runtime": 6.8846, + "eval_samples_per_second": 524.07, + "eval_steps_per_second": 8.279, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.5624609072316884e-05, + "loss": 0.695, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.05693689015551e-05, + "loss": 0.693, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.8023791909217834, + "eval_runtime": 6.7518, + "eval_samples_per_second": 534.373, + "eval_steps_per_second": 8.442, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.5801925472893242e-05, + "loss": 0.6939, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.1355787637249674e-05, + "loss": 0.6934, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8015068769454956, + "eval_runtime": 6.7805, + "eval_samples_per_second": 532.113, + "eval_steps_per_second": 8.406, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.7262205890225772e-05, + "loss": 0.6911, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8050680756568909, + "eval_runtime": 6.6202, + "eval_samples_per_second": 544.999, + "eval_steps_per_second": 8.61, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.3549952722263349e-05, + "loss": 0.6896, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.0245120385916187e-05, + "loss": 0.6879, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8068881034851074, + "eval_runtime": 6.6458, + "eval_samples_per_second": 542.9, + "eval_steps_per_second": 8.577, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 7.370937501665227e-06, + "loss": 0.6903, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.9476057912973655e-06, + "loss": 0.6932, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.8008785247802734, + "eval_runtime": 6.742, + "eval_samples_per_second": 535.156, + "eval_steps_per_second": 8.455, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.9921580863969824e-06, + "loss": 0.6912, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.924889135254989, + "eval_loss": 0.7968100309371948, + "eval_runtime": 6.7991, + "eval_samples_per_second": 530.661, + "eval_steps_per_second": 8.383, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.5183386099637947e-06, + "loss": 0.6861, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 5.365063726196294e-07, + "loss": 0.6877, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8051350712776184, + "eval_runtime": 6.5909, + "eval_samples_per_second": 547.421, + "eval_steps_per_second": 8.648, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 5.356236240142569e-08, + "loss": 0.6894, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9251662971175166, + "eval_loss": 0.7958797216415405, + "eval_runtime": 6.9948, + "eval_samples_per_second": 515.811, + "eval_steps_per_second": 8.149, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0001430308140343932, + "metric": "eval/loss", + "warmup_ratio": 0.22639169356076547 + } +} diff --git a/run-29h0ichm/checkpoint-1260/training_args.bin b/run-29h0ichm/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..44fd806571fab0fcfc66673efcf3a3a3b30790a8 --- /dev/null +++ b/run-29h0ichm/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0693baa920a9bda1a4c27253ea57f3d05a8f31b953077ab5e810badcb810c98b +size 4792 diff --git a/run-2ec3ndpb/checkpoint-595/model.safetensors b/run-2ec3ndpb/checkpoint-595/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1a2cfdc66a057a4ac1ecef0e45407aa380fdd6a --- /dev/null +++ b/run-2ec3ndpb/checkpoint-595/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3422b7650c5fae12ef43cb0ab6664f91d5ab71eaae240fa6fefacc139458bfa0 +size 198025308 diff --git a/run-2ec3ndpb/checkpoint-595/optimizer.pt b/run-2ec3ndpb/checkpoint-595/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b4f45db2b7da5960889b65f168efeeceb4e44da --- /dev/null +++ b/run-2ec3ndpb/checkpoint-595/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87c2ed9351821088939733495dc53b95c287d8dcf240f2b510583dbe7e348f37 +size 395900602 diff --git a/run-2ec3ndpb/checkpoint-595/rng_state.pth b/run-2ec3ndpb/checkpoint-595/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b2798d3ef22ba33b35deea6a8c61abbb56099a6 --- /dev/null +++ b/run-2ec3ndpb/checkpoint-595/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4b46d5b7fd917d05ccb48b8b2f6f0c7b9f5cfd5e53675d2f6391274fc4f7a5 +size 14244 diff --git a/run-2ec3ndpb/checkpoint-595/scheduler.pt b/run-2ec3ndpb/checkpoint-595/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6fc09c13d2eadc6751671cf8fd18c6e85a96f2c --- /dev/null +++ b/run-2ec3ndpb/checkpoint-595/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a34ba377bc17d5e112a7e0c0e268e0e0037e102c042dea0c6a403967bde87c0 +size 1064 diff --git a/run-2ec3ndpb/checkpoint-595/trainer_state.json b/run-2ec3ndpb/checkpoint-595/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ddc1ea4f0db199083f950d57734c83e14c9be41f --- /dev/null +++ b/run-2ec3ndpb/checkpoint-595/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.9176453274356926, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-2ec3ndpb/checkpoint-595", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 595, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.936764901496956e-06, + "loss": 1.5475, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5715077605321508, + "eval_f1": 0.6254188737059849, + "eval_loss": 1.4664075374603271, + "eval_precision": 0.7013086313668033, + "eval_recall": 0.5715077605321508, + "eval_runtime": 7.8639, + "eval_samples_per_second": 458.806, + "eval_steps_per_second": 3.688, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.873529802993912e-06, + "loss": 1.4865, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.810294704490867e-06, + "loss": 1.3592, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8226164079822617, + "eval_f1": 0.7522993425476635, + "eval_loss": 1.223617672920227, + "eval_precision": 0.7162776510562452, + "eval_recall": 0.8226164079822617, + "eval_runtime": 8.175, + "eval_samples_per_second": 441.348, + "eval_steps_per_second": 3.547, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.747059605987823e-06, + "loss": 1.1868, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9856269359588623, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9211, + "eval_samples_per_second": 455.492, + "eval_steps_per_second": 3.661, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.683824507484779e-06, + "loss": 1.0295, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 1.1620589408981734e-05, + "loss": 0.9561, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8580931263858093, + "eval_f1": 0.8098908709839147, + "eval_loss": 0.9289180040359497, + "eval_precision": 0.8592303189674444, + "eval_recall": 0.8580931263858093, + "eval_runtime": 7.907, + "eval_samples_per_second": 456.302, + "eval_steps_per_second": 3.668, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 1.355735431047869e-05, + "loss": 0.9259, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 1.4143401600798801e-05, + "loss": 0.8793, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.85655335192574, + "eval_loss": 0.8785842657089233, + "eval_precision": 0.8712807141815823, + "eval_recall": 0.8819290465631929, + "eval_runtime": 7.9454, + "eval_samples_per_second": 454.098, + "eval_steps_per_second": 3.65, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 1.409431204120358e-05, + "loss": 0.8666, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8777973973396866, + "eval_loss": 0.8540728092193604, + "eval_precision": 0.8868719145172282, + "eval_recall": 0.893569844789357, + "eval_runtime": 8.1546, + "eval_samples_per_second": 442.45, + "eval_steps_per_second": 3.556, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 1.4004347179040315e-05, + "loss": 0.8516, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 1.3874031025898125e-05, + "loss": 0.8363, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.888537719172523, + "eval_loss": 0.8393830060958862, + "eval_precision": 0.8872498218718483, + "eval_recall": 0.899390243902439, + "eval_runtime": 7.7776, + "eval_samples_per_second": 463.899, + "eval_steps_per_second": 3.729, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 1.3704122624506069e-05, + "loss": 0.8301, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 1.3495611627593855e-05, + "loss": 0.8122, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8989549173997827, + "eval_loss": 0.8307921290397644, + "eval_precision": 0.8960018712418991, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.7498, + "eval_samples_per_second": 465.561, + "eval_steps_per_second": 3.742, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 1.3249712533535771e-05, + "loss": 0.8062, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.8981091230267847, + "eval_loss": 0.8160279393196106, + "eval_precision": 0.894901759345189, + "eval_recall": 0.9057649667405765, + "eval_runtime": 7.939, + "eval_samples_per_second": 454.466, + "eval_steps_per_second": 3.653, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 1.2967857612353044e-05, + "loss": 0.8111, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 1.2651688563277996e-05, + "loss": 0.8036, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.8999338622720794, + "eval_loss": 0.8108046650886536, + "eval_precision": 0.8958813932128812, + "eval_recall": 0.9065964523281597, + "eval_runtime": 8.1818, + "eval_samples_per_second": 440.976, + "eval_steps_per_second": 3.544, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 1.2303046952471591e-05, + "loss": 0.7929, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9047620292016882, + "eval_loss": 0.8064945936203003, + "eval_precision": 0.9016383270952992, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.715, + "eval_samples_per_second": 467.659, + "eval_steps_per_second": 3.759, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 1.192396348659105e-05, + "loss": 0.8057, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 1.1516646184685007e-05, + "loss": 0.7928, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.905079165616713, + "eval_loss": 0.8043007254600525, + "eval_precision": 0.9018080346187369, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.1802, + "eval_samples_per_second": 441.067, + "eval_steps_per_second": 3.545, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 1.1083467517310466e-05, + "loss": 0.7853, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 1.0626950587781368e-05, + "loss": 0.7879, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9050578744319696, + "eval_loss": 0.8095937371253967, + "eval_precision": 0.9016838815518865, + "eval_recall": 0.9096452328159645, + "eval_runtime": 7.7337, + "eval_samples_per_second": 466.533, + "eval_steps_per_second": 3.75, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 1.0149754436037851e-05, + "loss": 0.7829, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9049817144353016, + "eval_loss": 0.8055588006973267, + "eval_precision": 0.9016293836504138, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.3922, + "eval_samples_per_second": 488.085, + "eval_steps_per_second": 3.923, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 9.654658550735641e-06, + "loss": 0.7843, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 9.144546679766793e-06, + "loss": 0.7863, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9039107732653265, + "eval_loss": 0.8022986650466919, + "eval_precision": 0.8998813619061209, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.7639, + "eval_samples_per_second": 464.714, + "eval_steps_per_second": 3.735, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 8.622390033509516e-06, + "loss": 0.7797, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.09122997864181e-06, + "loss": 0.7838, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9073506205504057, + "eval_loss": 0.7976791262626648, + "eval_precision": 0.9035099764092612, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.6599, + "eval_samples_per_second": 471.023, + "eval_steps_per_second": 3.786, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 7.554160323320924e-06, + "loss": 0.777, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9083766271376222, + "eval_loss": 0.7962745428085327, + "eval_precision": 0.9046016901483156, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.0528, + "eval_samples_per_second": 448.043, + "eval_steps_per_second": 3.601, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.014309296910907e-06, + "loss": 0.7813, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 6.474821329219502e-06, + "loss": 0.7748, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9085073379106339, + "eval_loss": 0.7997791171073914, + "eval_precision": 0.9048166033865582, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.6819, + "eval_samples_per_second": 469.674, + "eval_steps_per_second": 3.775, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.938838735373549e-06, + "loss": 0.7762, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.409483413011605e-06, + "loss": 0.7715, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9090867419321762, + "eval_loss": 0.7982298731803894, + "eval_precision": 0.9063582520937362, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.4392, + "eval_samples_per_second": 484.996, + "eval_steps_per_second": 3.898, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.889838658400776e-06, + "loss": 0.7715, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9115767820884764, + "eval_loss": 0.7987366318702698, + "eval_precision": 0.9076263627155844, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8031, + "eval_samples_per_second": 462.379, + "eval_steps_per_second": 3.716, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.382931207392063e-06, + "loss": 0.7734, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.891713605818937e-06, + "loss": 0.773, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9137769642701411, + "eval_loss": 0.7950804829597473, + "eval_precision": 0.9104569277385008, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.1137, + "eval_samples_per_second": 444.679, + "eval_steps_per_second": 3.574, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.419047012024923e-06, + "loss": 0.7738, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9089176770179663, + "eval_loss": 0.8004584908485413, + "eval_precision": 0.9050447873720642, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.9567, + "eval_samples_per_second": 453.453, + "eval_steps_per_second": 3.645, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.967684531689074e-06, + "loss": 0.7694, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.5402551820176813e-06, + "loss": 0.7694, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9105315229815595, + "eval_loss": 0.7953075766563416, + "eval_precision": 0.9073112586511314, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.0218, + "eval_samples_per_second": 449.772, + "eval_steps_per_second": 3.615, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.139248578704814e-06, + "loss": 0.7676, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.767000434854411e-06, + "loss": 0.77, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9105919450119896, + "eval_loss": 0.7957642078399658, + "eval_precision": 0.9071463070942072, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.8504, + "eval_samples_per_second": 459.596, + "eval_steps_per_second": 3.694, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.4256789563272373e-06, + "loss": 0.768, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.91309593972845, + "eval_loss": 0.7954826951026917, + "eval_precision": 0.9095033411774633, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.0703, + "eval_samples_per_second": 447.071, + "eval_steps_per_second": 3.593, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.1172722127547217e-06, + "loss": 0.7654, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 8.43576557778811e-07, + "loss": 0.7706, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9129859450767841, + "eval_loss": 0.7928676605224609, + "eval_precision": 0.9093761209256284, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.7916, + "eval_samples_per_second": 463.062, + "eval_steps_per_second": 3.722, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.061861659655526e-07, + "loss": 0.772, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.0648374733599056e-07, + "loss": 0.7618, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9119842470491915, + "eval_loss": 0.7993268370628357, + "eval_precision": 0.9082416074735461, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8608, + "eval_samples_per_second": 458.989, + "eval_steps_per_second": 3.689, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.456324935987598e-07, + "loss": 0.7658, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9176453274356926, + "eval_loss": 0.7931926846504211, + "eval_precision": 0.9137916760285146, + "eval_recall": 0.9229490022172949, + "eval_runtime": 7.6119, + "eval_samples_per_second": 473.997, + "eval_steps_per_second": 3.81, + "step": 595 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4089097060736896, + "learning_rate": 1.4153281972477753e-05, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-2ec3ndpb/checkpoint-595/training_args.bin b/run-2ec3ndpb/checkpoint-595/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..30c665f046f9939bc0b060de8a567455d0846e82 --- /dev/null +++ b/run-2ec3ndpb/checkpoint-595/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b63b18769431b6cafd0d6a30d20a7a96abbca92cc17828e90ecd455ed2e4804 +size 4792 diff --git a/run-2ec3ndpb/checkpoint-630/model.safetensors b/run-2ec3ndpb/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7d2b735e2f6914b1e14c392304d0ade1ef04fd0 --- /dev/null +++ b/run-2ec3ndpb/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452fae5b73d38e770bdb1932bb71270fd725e79c840978da193990c2c766c3a2 +size 198025308 diff --git a/run-2ec3ndpb/checkpoint-630/optimizer.pt b/run-2ec3ndpb/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba531a302d2bc58a6a42de1708891f6d47dda854 --- /dev/null +++ b/run-2ec3ndpb/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dba820a526a8e9109c5074c4f6f03746eb94064aa0e2024d495cabb82e188e6 +size 395900602 diff --git a/run-2ec3ndpb/checkpoint-630/rng_state.pth b/run-2ec3ndpb/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-2ec3ndpb/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-2ec3ndpb/checkpoint-630/scheduler.pt b/run-2ec3ndpb/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1939b772723aac65b7c7268be010aa403c4e8571 --- /dev/null +++ b/run-2ec3ndpb/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a8a3be720021a3787348cfda82f79c9bb915df8d4978e2e948ef0b5e8c0bfe +size 1064 diff --git a/run-2ec3ndpb/checkpoint-630/trainer_state.json b/run-2ec3ndpb/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7d8fd77351a60324984c6f56aaf1dee67dd84055 --- /dev/null +++ b/run-2ec3ndpb/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9176453274356926, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-2ec3ndpb/checkpoint-595", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.936764901496956e-06, + "loss": 1.5475, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5715077605321508, + "eval_f1": 0.6254188737059849, + "eval_loss": 1.4664075374603271, + "eval_precision": 0.7013086313668033, + "eval_recall": 0.5715077605321508, + "eval_runtime": 7.8639, + "eval_samples_per_second": 458.806, + "eval_steps_per_second": 3.688, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.873529802993912e-06, + "loss": 1.4865, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.810294704490867e-06, + "loss": 1.3592, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8226164079822617, + "eval_f1": 0.7522993425476635, + "eval_loss": 1.223617672920227, + "eval_precision": 0.7162776510562452, + "eval_recall": 0.8226164079822617, + "eval_runtime": 8.175, + "eval_samples_per_second": 441.348, + "eval_steps_per_second": 3.547, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.747059605987823e-06, + "loss": 1.1868, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9856269359588623, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9211, + "eval_samples_per_second": 455.492, + "eval_steps_per_second": 3.661, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.683824507484779e-06, + "loss": 1.0295, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 1.1620589408981734e-05, + "loss": 0.9561, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8580931263858093, + "eval_f1": 0.8098908709839147, + "eval_loss": 0.9289180040359497, + "eval_precision": 0.8592303189674444, + "eval_recall": 0.8580931263858093, + "eval_runtime": 7.907, + "eval_samples_per_second": 456.302, + "eval_steps_per_second": 3.668, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 1.355735431047869e-05, + "loss": 0.9259, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 1.4143401600798801e-05, + "loss": 0.8793, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.85655335192574, + "eval_loss": 0.8785842657089233, + "eval_precision": 0.8712807141815823, + "eval_recall": 0.8819290465631929, + "eval_runtime": 7.9454, + "eval_samples_per_second": 454.098, + "eval_steps_per_second": 3.65, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 1.409431204120358e-05, + "loss": 0.8666, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8777973973396866, + "eval_loss": 0.8540728092193604, + "eval_precision": 0.8868719145172282, + "eval_recall": 0.893569844789357, + "eval_runtime": 8.1546, + "eval_samples_per_second": 442.45, + "eval_steps_per_second": 3.556, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 1.4004347179040315e-05, + "loss": 0.8516, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 1.3874031025898125e-05, + "loss": 0.8363, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.888537719172523, + "eval_loss": 0.8393830060958862, + "eval_precision": 0.8872498218718483, + "eval_recall": 0.899390243902439, + "eval_runtime": 7.7776, + "eval_samples_per_second": 463.899, + "eval_steps_per_second": 3.729, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 1.3704122624506069e-05, + "loss": 0.8301, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 1.3495611627593855e-05, + "loss": 0.8122, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8989549173997827, + "eval_loss": 0.8307921290397644, + "eval_precision": 0.8960018712418991, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.7498, + "eval_samples_per_second": 465.561, + "eval_steps_per_second": 3.742, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 1.3249712533535771e-05, + "loss": 0.8062, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.8981091230267847, + "eval_loss": 0.8160279393196106, + "eval_precision": 0.894901759345189, + "eval_recall": 0.9057649667405765, + "eval_runtime": 7.939, + "eval_samples_per_second": 454.466, + "eval_steps_per_second": 3.653, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 1.2967857612353044e-05, + "loss": 0.8111, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 1.2651688563277996e-05, + "loss": 0.8036, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.8999338622720794, + "eval_loss": 0.8108046650886536, + "eval_precision": 0.8958813932128812, + "eval_recall": 0.9065964523281597, + "eval_runtime": 8.1818, + "eval_samples_per_second": 440.976, + "eval_steps_per_second": 3.544, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 1.2303046952471591e-05, + "loss": 0.7929, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9047620292016882, + "eval_loss": 0.8064945936203003, + "eval_precision": 0.9016383270952992, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.715, + "eval_samples_per_second": 467.659, + "eval_steps_per_second": 3.759, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 1.192396348659105e-05, + "loss": 0.8057, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 1.1516646184685007e-05, + "loss": 0.7928, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.905079165616713, + "eval_loss": 0.8043007254600525, + "eval_precision": 0.9018080346187369, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.1802, + "eval_samples_per_second": 441.067, + "eval_steps_per_second": 3.545, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 1.1083467517310466e-05, + "loss": 0.7853, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 1.0626950587781368e-05, + "loss": 0.7879, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9050578744319696, + "eval_loss": 0.8095937371253967, + "eval_precision": 0.9016838815518865, + "eval_recall": 0.9096452328159645, + "eval_runtime": 7.7337, + "eval_samples_per_second": 466.533, + "eval_steps_per_second": 3.75, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 1.0149754436037851e-05, + "loss": 0.7829, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9049817144353016, + "eval_loss": 0.8055588006973267, + "eval_precision": 0.9016293836504138, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.3922, + "eval_samples_per_second": 488.085, + "eval_steps_per_second": 3.923, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 9.654658550735641e-06, + "loss": 0.7843, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 9.144546679766793e-06, + "loss": 0.7863, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9039107732653265, + "eval_loss": 0.8022986650466919, + "eval_precision": 0.8998813619061209, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.7639, + "eval_samples_per_second": 464.714, + "eval_steps_per_second": 3.735, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 8.622390033509516e-06, + "loss": 0.7797, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.09122997864181e-06, + "loss": 0.7838, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9073506205504057, + "eval_loss": 0.7976791262626648, + "eval_precision": 0.9035099764092612, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.6599, + "eval_samples_per_second": 471.023, + "eval_steps_per_second": 3.786, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 7.554160323320924e-06, + "loss": 0.777, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9083766271376222, + "eval_loss": 0.7962745428085327, + "eval_precision": 0.9046016901483156, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.0528, + "eval_samples_per_second": 448.043, + "eval_steps_per_second": 3.601, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.014309296910907e-06, + "loss": 0.7813, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 6.474821329219502e-06, + "loss": 0.7748, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9085073379106339, + "eval_loss": 0.7997791171073914, + "eval_precision": 0.9048166033865582, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.6819, + "eval_samples_per_second": 469.674, + "eval_steps_per_second": 3.775, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.938838735373549e-06, + "loss": 0.7762, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.409483413011605e-06, + "loss": 0.7715, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9090867419321762, + "eval_loss": 0.7982298731803894, + "eval_precision": 0.9063582520937362, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.4392, + "eval_samples_per_second": 484.996, + "eval_steps_per_second": 3.898, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.889838658400776e-06, + "loss": 0.7715, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9115767820884764, + "eval_loss": 0.7987366318702698, + "eval_precision": 0.9076263627155844, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8031, + "eval_samples_per_second": 462.379, + "eval_steps_per_second": 3.716, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.382931207392063e-06, + "loss": 0.7734, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.891713605818937e-06, + "loss": 0.773, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9137769642701411, + "eval_loss": 0.7950804829597473, + "eval_precision": 0.9104569277385008, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.1137, + "eval_samples_per_second": 444.679, + "eval_steps_per_second": 3.574, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.419047012024923e-06, + "loss": 0.7738, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9089176770179663, + "eval_loss": 0.8004584908485413, + "eval_precision": 0.9050447873720642, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.9567, + "eval_samples_per_second": 453.453, + "eval_steps_per_second": 3.645, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.967684531689074e-06, + "loss": 0.7694, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.5402551820176813e-06, + "loss": 0.7694, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9105315229815595, + "eval_loss": 0.7953075766563416, + "eval_precision": 0.9073112586511314, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.0218, + "eval_samples_per_second": 449.772, + "eval_steps_per_second": 3.615, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.139248578704814e-06, + "loss": 0.7676, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.767000434854411e-06, + "loss": 0.77, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9105919450119896, + "eval_loss": 0.7957642078399658, + "eval_precision": 0.9071463070942072, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.8504, + "eval_samples_per_second": 459.596, + "eval_steps_per_second": 3.694, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.4256789563272373e-06, + "loss": 0.768, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.91309593972845, + "eval_loss": 0.7954826951026917, + "eval_precision": 0.9095033411774633, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.0703, + "eval_samples_per_second": 447.071, + "eval_steps_per_second": 3.593, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.1172722127547217e-06, + "loss": 0.7654, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 8.43576557778811e-07, + "loss": 0.7706, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9129859450767841, + "eval_loss": 0.7928676605224609, + "eval_precision": 0.9093761209256284, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.7916, + "eval_samples_per_second": 463.062, + "eval_steps_per_second": 3.722, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.061861659655526e-07, + "loss": 0.772, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.0648374733599056e-07, + "loss": 0.7618, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9119842470491915, + "eval_loss": 0.7993268370628357, + "eval_precision": 0.9082416074735461, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8608, + "eval_samples_per_second": 458.989, + "eval_steps_per_second": 3.689, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.456324935987598e-07, + "loss": 0.7658, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9176453274356926, + "eval_loss": 0.7931926846504211, + "eval_precision": 0.9137916760285146, + "eval_recall": 0.9229490022172949, + "eval_runtime": 7.6119, + "eval_samples_per_second": 473.997, + "eval_steps_per_second": 3.81, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.2456930299452522e-07, + "loss": 0.7722, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.3999323215026404e-08, + "loss": 0.7655, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9141648132209017, + "eval_loss": 0.7929724454879761, + "eval_precision": 0.9104752496332952, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.8863, + "eval_samples_per_second": 457.501, + "eval_steps_per_second": 3.677, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 4.3918441821762915e-09, + "loss": 0.7684, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9085437767939906, + "eval_loss": 0.7970999479293823, + "eval_precision": 0.90492982208666, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.8353, + "eval_samples_per_second": 460.479, + "eval_steps_per_second": 3.701, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4089097060736896, + "learning_rate": 1.4153281972477753e-05, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-2ec3ndpb/checkpoint-630/training_args.bin b/run-2ec3ndpb/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..30c665f046f9939bc0b060de8a567455d0846e82 --- /dev/null +++ b/run-2ec3ndpb/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b63b18769431b6cafd0d6a30d20a7a96abbca92cc17828e90ecd455ed2e4804 +size 4792 diff --git a/run-2p4iktw6/checkpoint-573/model.safetensors b/run-2p4iktw6/checkpoint-573/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c60782056fc08205b796f4e1f014fdf070053287 --- /dev/null +++ b/run-2p4iktw6/checkpoint-573/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c07a634c4b5cc154c86b61cf14a4529b7b3605bea7e9020fabb0814438ffd04b +size 198025308 diff --git a/run-2p4iktw6/checkpoint-573/optimizer.pt b/run-2p4iktw6/checkpoint-573/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2795e50078e6259f44409d0c5f0931398966eff --- /dev/null +++ b/run-2p4iktw6/checkpoint-573/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5dfb8e9568de892efbddcc940886c2fe9acf4dd193ebc65d3de2f6358b33345 +size 395900602 diff --git a/run-2p4iktw6/checkpoint-573/rng_state.pth b/run-2p4iktw6/checkpoint-573/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e8713f9caaf617efce0d3935618a93ad2f5f391 --- /dev/null +++ b/run-2p4iktw6/checkpoint-573/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9102bb312b12c2313ea7585eb813beef5c548592778aaea4ab0516e14ecd38e5 +size 14244 diff --git a/run-2p4iktw6/checkpoint-573/scheduler.pt b/run-2p4iktw6/checkpoint-573/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..784d99bab4a25a7003667042dca3f025dfa231fe --- /dev/null +++ b/run-2p4iktw6/checkpoint-573/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2a7e9e11999045fd21f497f1819752f36d15a8cadb2bb3701cc67c82030c79 +size 1064 diff --git a/run-2p4iktw6/checkpoint-573/trainer_state.json b/run-2p4iktw6/checkpoint-573/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d3104589e03cb7f4932954bee4a56f346dd82713 --- /dev/null +++ b/run-2p4iktw6/checkpoint-573/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": 0.9171500454838738, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-2p4iktw6/checkpoint-573", + "epoch": 26.96470588235294, + "eval_steps": 500, + "global_step": 573, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 9.671660480014577e-06, + "loss": 1.4961, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8245565410199557, + "eval_f1": 0.7499760374885369, + "eval_loss": 1.1728752851486206, + "eval_precision": 0.6974809379726281, + "eval_recall": 0.8245565410199557, + "eval_runtime": 8.2611, + "eval_samples_per_second": 436.745, + "eval_steps_per_second": 3.51, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.9343320960029155e-05, + "loss": 1.2464, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.9014981440043734e-05, + "loss": 0.9809, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8378603104212861, + "eval_f1": 0.7732814771869145, + "eval_loss": 0.9319130778312683, + "eval_precision": 0.7879768633349686, + "eval_recall": 0.8378603104212861, + "eval_runtime": 8.1295, + "eval_samples_per_second": 443.815, + "eval_steps_per_second": 3.567, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.868664192005831e-05, + "loss": 0.8994, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8780555928007342, + "eval_loss": 0.885788083076477, + "eval_precision": 0.8844503612792898, + "eval_recall": 0.8902439024390244, + "eval_runtime": 8.3173, + "eval_samples_per_second": 433.795, + "eval_steps_per_second": 3.487, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 4.835830240007289e-05, + "loss": 0.8663, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 5.802996288008747e-05, + "loss": 0.8262, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.8974773466107481, + "eval_loss": 0.8151126503944397, + "eval_precision": 0.894196337416798, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.2055, + "eval_samples_per_second": 439.707, + "eval_steps_per_second": 3.534, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 6.770162336010204e-05, + "loss": 0.8129, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 7.737328384011662e-05, + "loss": 0.7912, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8442350332594235, + "eval_f1": 0.8591815405089629, + "eval_loss": 0.9065120220184326, + "eval_precision": 0.8924922991651157, + "eval_recall": 0.8442350332594235, + "eval_runtime": 8.314, + "eval_samples_per_second": 433.967, + "eval_steps_per_second": 3.488, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 8.704494432013121e-05, + "loss": 0.7753, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.8922035583368066, + "eval_loss": 0.8154860138893127, + "eval_precision": 0.8968589693893608, + "eval_recall": 0.9054878048780488, + "eval_runtime": 8.236, + "eval_samples_per_second": 438.079, + "eval_steps_per_second": 3.521, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 9.372614111798525e-05, + "loss": 0.7768, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 9.34778045774312e-05, + "loss": 0.7697, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9109614404435225, + "eval_loss": 0.8117265701293945, + "eval_precision": 0.9106765560512459, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1686, + "eval_samples_per_second": 441.69, + "eval_steps_per_second": 3.55, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 9.292359368575795e-05, + "loss": 0.7624, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 9.206714560606023e-05, + "loss": 0.7564, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8869179600886918, + "eval_f1": 0.8882318988191589, + "eval_loss": 0.8478553295135498, + "eval_precision": 0.8935435295528827, + "eval_recall": 0.8869179600886918, + "eval_runtime": 8.2647, + "eval_samples_per_second": 436.555, + "eval_steps_per_second": 3.509, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 9.091408101702702e-05, + "loss": 0.7533, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.8969843636243172, + "eval_loss": 0.8173887729644775, + "eval_precision": 0.9011406760484777, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.3958, + "eval_samples_per_second": 429.739, + "eval_steps_per_second": 3.454, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 8.947196722567463e-05, + "loss": 0.7471, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 8.775026850479e-05, + "loss": 0.7414, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9029933481152993, + "eval_f1": 0.9011683886128516, + "eval_loss": 0.8166917562484741, + "eval_precision": 0.8997527926099514, + "eval_recall": 0.9029933481152993, + "eval_runtime": 8.263, + "eval_samples_per_second": 436.646, + "eval_steps_per_second": 3.51, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 8.576028398100868e-05, + "loss": 0.7355, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.885809312638581, + "eval_f1": 0.8894948033604554, + "eval_loss": 0.8477791547775269, + "eval_precision": 0.8986121028049193, + "eval_recall": 0.885809312638581, + "eval_runtime": 8.1754, + "eval_samples_per_second": 441.325, + "eval_steps_per_second": 3.547, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 8.351507348115427e-05, + "loss": 0.7359, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 8.102937182349292e-05, + "loss": 0.7281, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9046843849567804, + "eval_loss": 0.8065732717514038, + "eval_precision": 0.9043960204543866, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.0818, + "eval_samples_per_second": 446.433, + "eval_steps_per_second": 3.588, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 7.831949211639113e-05, + "loss": 0.7303, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 7.54032186990055e-05, + "loss": 0.7219, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8702882483370288, + "eval_f1": 0.8788574444552584, + "eval_loss": 0.8701152801513672, + "eval_precision": 0.8975532644191331, + "eval_recall": 0.8702882483370288, + "eval_runtime": 8.1674, + "eval_samples_per_second": 441.755, + "eval_steps_per_second": 3.551, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 7.229969042661155e-05, + "loss": 0.7279, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9106615573410031, + "eval_loss": 0.8070887327194214, + "eval_precision": 0.9074740966588659, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.2694, + "eval_samples_per_second": 436.306, + "eval_steps_per_second": 3.507, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 6.902927506654365e-05, + "loss": 0.7195, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 6.561343562905793e-05, + "loss": 0.7189, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9141379676157104, + "eval_loss": 0.8022801876068115, + "eval_precision": 0.9138603736910345, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.9054, + "eval_samples_per_second": 456.399, + "eval_steps_per_second": 3.668, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 6.207458951035903e-05, + "loss": 0.7155, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 5.84359613722033e-05, + "loss": 0.7164, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.8983183671728243, + "eval_loss": 0.8196560144424438, + "eval_precision": 0.902842813643671, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.5083, + "eval_samples_per_second": 424.058, + "eval_steps_per_second": 3.408, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 5.4721430723597854e-05, + "loss": 0.7123, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8976341312656742, + "eval_loss": 0.8457297682762146, + "eval_precision": 0.9076392709636745, + "eval_recall": 0.893569844789357, + "eval_runtime": 8.4114, + "eval_samples_per_second": 428.942, + "eval_steps_per_second": 3.448, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.095537520488197e-05, + "loss": 0.7102, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 4.7162510602683344e-05, + "loss": 0.7097, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9136432115602193, + "eval_loss": 0.8054841160774231, + "eval_precision": 0.9116213211134261, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.209, + "eval_samples_per_second": 439.517, + "eval_steps_per_second": 3.533, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.336772864569531e-05, + "loss": 0.7101, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 3.9595933645785124e-05, + "loss": 0.7045, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9110574789669327, + "eval_loss": 0.8054888248443604, + "eval_precision": 0.9114025206515529, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.9751, + "eval_samples_per_second": 452.408, + "eval_steps_per_second": 3.636, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.587187905652197e-05, + "loss": 0.7095, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9117132382702443, + "eval_loss": 0.8040168285369873, + "eval_precision": 0.9084731779414078, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.2531, + "eval_samples_per_second": 437.171, + "eval_steps_per_second": 3.514, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.2220005021754103e-05, + "loss": 0.7024, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.8664277980368635e-05, + "loss": 0.6997, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9107222897486772, + "eval_loss": 0.8129555583000183, + "eval_precision": 0.9104628437817891, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8415, + "eval_samples_per_second": 460.117, + "eval_steps_per_second": 3.698, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.522803337987148e-05, + "loss": 0.7033, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.909821372442197, + "eval_loss": 0.8067628145217896, + "eval_precision": 0.9076494624244972, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9607, + "eval_samples_per_second": 453.226, + "eval_steps_per_second": 3.643, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.1933822531023823e-05, + "loss": 0.7022, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.8803264608594133e-05, + "loss": 0.6998, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9109273990680833, + "eval_loss": 0.8095477223396301, + "eval_precision": 0.9094140793033426, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.0457, + "eval_samples_per_second": 448.436, + "eval_steps_per_second": 3.604, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.585690476951173e-05, + "loss": 0.7, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.3114079319561077e-05, + "loss": 0.6996, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.915288285412851, + "eval_loss": 0.8015657663345337, + "eval_precision": 0.9138025920826565, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.187, + "eval_samples_per_second": 440.699, + "eval_steps_per_second": 3.542, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.0592788813497991e-05, + "loss": 0.6979, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9218403547671841, + "eval_f1": 0.9157345647914885, + "eval_loss": 0.804965078830719, + "eval_precision": 0.9148308975478764, + "eval_recall": 0.9218403547671841, + "eval_runtime": 7.9686, + "eval_samples_per_second": 452.776, + "eval_steps_per_second": 3.639, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 8.309579921403324e-06, + "loss": 0.6982, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 6.279436836559165e-06, + "loss": 0.6954, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9165629470219959, + "eval_loss": 0.7967893481254578, + "eval_precision": 0.9147617955927566, + "eval_recall": 0.9212860310421286, + "eval_runtime": 8.0467, + "eval_samples_per_second": 448.382, + "eval_steps_per_second": 3.604, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.51568293751356e-06, + "loss": 0.6958, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.0298933497036873e-06, + "loss": 0.6936, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9171500454838738, + "eval_loss": 0.8044392466545105, + "eval_precision": 0.9152909185140126, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.1313, + "eval_samples_per_second": 443.718, + "eval_steps_per_second": 3.566, + "step": 573 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.06393182605018903, + "learning_rate": 9.37407092678336e-05, + "metric": "eval/loss", + "weight_decay": 0.01325512089439292 + } +} diff --git a/run-2p4iktw6/checkpoint-573/training_args.bin b/run-2p4iktw6/checkpoint-573/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4920ed799507af8ca06297b8721f39f7df0121ea --- /dev/null +++ b/run-2p4iktw6/checkpoint-573/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c14d3eb2333c519baff3e0037f0c44fb15e0cfde78c070d2c11565f6c19c1c7 +size 4792 diff --git a/run-2p4iktw6/checkpoint-630/model.safetensors b/run-2p4iktw6/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a14ae0a506aa99e9719a55a0eaf901ec5db7c74e --- /dev/null +++ b/run-2p4iktw6/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6459cf5ea6806e89d4de71ed432c2819af98d2a0088ee55fad02c4ab66b94af +size 198025308 diff --git a/run-2p4iktw6/checkpoint-630/optimizer.pt b/run-2p4iktw6/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4d94f92bbb17fe87e3f2e8ad552ee7461fef8ea --- /dev/null +++ b/run-2p4iktw6/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8988f0b18414e15e68ec946262a898d4c55c2775fceca92c563eb8a1c51ebcf0 +size 395900602 diff --git a/run-2p4iktw6/checkpoint-630/rng_state.pth b/run-2p4iktw6/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-2p4iktw6/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-2p4iktw6/checkpoint-630/scheduler.pt b/run-2p4iktw6/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ddd4a49b6da6f1e96814ae86a12fa33199885a8 --- /dev/null +++ b/run-2p4iktw6/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2662e35a431705aabc6b91f711bd814bbcc4c25e99822843a2aed8658866c8 +size 1064 diff --git a/run-2p4iktw6/checkpoint-630/trainer_state.json b/run-2p4iktw6/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5710a6bebd7e56cce74c040b4b4be7dcccdc4c12 --- /dev/null +++ b/run-2p4iktw6/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9171500454838738, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-2p4iktw6/checkpoint-573", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 9.671660480014577e-06, + "loss": 1.4961, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8245565410199557, + "eval_f1": 0.7499760374885369, + "eval_loss": 1.1728752851486206, + "eval_precision": 0.6974809379726281, + "eval_recall": 0.8245565410199557, + "eval_runtime": 8.2611, + "eval_samples_per_second": 436.745, + "eval_steps_per_second": 3.51, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.9343320960029155e-05, + "loss": 1.2464, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.9014981440043734e-05, + "loss": 0.9809, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8378603104212861, + "eval_f1": 0.7732814771869145, + "eval_loss": 0.9319130778312683, + "eval_precision": 0.7879768633349686, + "eval_recall": 0.8378603104212861, + "eval_runtime": 8.1295, + "eval_samples_per_second": 443.815, + "eval_steps_per_second": 3.567, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.868664192005831e-05, + "loss": 0.8994, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8780555928007342, + "eval_loss": 0.885788083076477, + "eval_precision": 0.8844503612792898, + "eval_recall": 0.8902439024390244, + "eval_runtime": 8.3173, + "eval_samples_per_second": 433.795, + "eval_steps_per_second": 3.487, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 4.835830240007289e-05, + "loss": 0.8663, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 5.802996288008747e-05, + "loss": 0.8262, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.8974773466107481, + "eval_loss": 0.8151126503944397, + "eval_precision": 0.894196337416798, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.2055, + "eval_samples_per_second": 439.707, + "eval_steps_per_second": 3.534, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 6.770162336010204e-05, + "loss": 0.8129, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 7.737328384011662e-05, + "loss": 0.7912, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8442350332594235, + "eval_f1": 0.8591815405089629, + "eval_loss": 0.9065120220184326, + "eval_precision": 0.8924922991651157, + "eval_recall": 0.8442350332594235, + "eval_runtime": 8.314, + "eval_samples_per_second": 433.967, + "eval_steps_per_second": 3.488, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 8.704494432013121e-05, + "loss": 0.7753, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.8922035583368066, + "eval_loss": 0.8154860138893127, + "eval_precision": 0.8968589693893608, + "eval_recall": 0.9054878048780488, + "eval_runtime": 8.236, + "eval_samples_per_second": 438.079, + "eval_steps_per_second": 3.521, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 9.372614111798525e-05, + "loss": 0.7768, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 9.34778045774312e-05, + "loss": 0.7697, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9109614404435225, + "eval_loss": 0.8117265701293945, + "eval_precision": 0.9106765560512459, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1686, + "eval_samples_per_second": 441.69, + "eval_steps_per_second": 3.55, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 9.292359368575795e-05, + "loss": 0.7624, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 9.206714560606023e-05, + "loss": 0.7564, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8869179600886918, + "eval_f1": 0.8882318988191589, + "eval_loss": 0.8478553295135498, + "eval_precision": 0.8935435295528827, + "eval_recall": 0.8869179600886918, + "eval_runtime": 8.2647, + "eval_samples_per_second": 436.555, + "eval_steps_per_second": 3.509, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 9.091408101702702e-05, + "loss": 0.7533, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.8969843636243172, + "eval_loss": 0.8173887729644775, + "eval_precision": 0.9011406760484777, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.3958, + "eval_samples_per_second": 429.739, + "eval_steps_per_second": 3.454, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 8.947196722567463e-05, + "loss": 0.7471, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 8.775026850479e-05, + "loss": 0.7414, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9029933481152993, + "eval_f1": 0.9011683886128516, + "eval_loss": 0.8166917562484741, + "eval_precision": 0.8997527926099514, + "eval_recall": 0.9029933481152993, + "eval_runtime": 8.263, + "eval_samples_per_second": 436.646, + "eval_steps_per_second": 3.51, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 8.576028398100868e-05, + "loss": 0.7355, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.885809312638581, + "eval_f1": 0.8894948033604554, + "eval_loss": 0.8477791547775269, + "eval_precision": 0.8986121028049193, + "eval_recall": 0.885809312638581, + "eval_runtime": 8.1754, + "eval_samples_per_second": 441.325, + "eval_steps_per_second": 3.547, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 8.351507348115427e-05, + "loss": 0.7359, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 8.102937182349292e-05, + "loss": 0.7281, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9046843849567804, + "eval_loss": 0.8065732717514038, + "eval_precision": 0.9043960204543866, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.0818, + "eval_samples_per_second": 446.433, + "eval_steps_per_second": 3.588, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 7.831949211639113e-05, + "loss": 0.7303, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 7.54032186990055e-05, + "loss": 0.7219, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8702882483370288, + "eval_f1": 0.8788574444552584, + "eval_loss": 0.8701152801513672, + "eval_precision": 0.8975532644191331, + "eval_recall": 0.8702882483370288, + "eval_runtime": 8.1674, + "eval_samples_per_second": 441.755, + "eval_steps_per_second": 3.551, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 7.229969042661155e-05, + "loss": 0.7279, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9106615573410031, + "eval_loss": 0.8070887327194214, + "eval_precision": 0.9074740966588659, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.2694, + "eval_samples_per_second": 436.306, + "eval_steps_per_second": 3.507, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 6.902927506654365e-05, + "loss": 0.7195, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 6.561343562905793e-05, + "loss": 0.7189, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9141379676157104, + "eval_loss": 0.8022801876068115, + "eval_precision": 0.9138603736910345, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.9054, + "eval_samples_per_second": 456.399, + "eval_steps_per_second": 3.668, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 6.207458951035903e-05, + "loss": 0.7155, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 5.84359613722033e-05, + "loss": 0.7164, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.8983183671728243, + "eval_loss": 0.8196560144424438, + "eval_precision": 0.902842813643671, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.5083, + "eval_samples_per_second": 424.058, + "eval_steps_per_second": 3.408, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 5.4721430723597854e-05, + "loss": 0.7123, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8976341312656742, + "eval_loss": 0.8457297682762146, + "eval_precision": 0.9076392709636745, + "eval_recall": 0.893569844789357, + "eval_runtime": 8.4114, + "eval_samples_per_second": 428.942, + "eval_steps_per_second": 3.448, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.095537520488197e-05, + "loss": 0.7102, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 4.7162510602683344e-05, + "loss": 0.7097, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9136432115602193, + "eval_loss": 0.8054841160774231, + "eval_precision": 0.9116213211134261, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.209, + "eval_samples_per_second": 439.517, + "eval_steps_per_second": 3.533, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.336772864569531e-05, + "loss": 0.7101, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 3.9595933645785124e-05, + "loss": 0.7045, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9110574789669327, + "eval_loss": 0.8054888248443604, + "eval_precision": 0.9114025206515529, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.9751, + "eval_samples_per_second": 452.408, + "eval_steps_per_second": 3.636, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.587187905652197e-05, + "loss": 0.7095, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9117132382702443, + "eval_loss": 0.8040168285369873, + "eval_precision": 0.9084731779414078, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.2531, + "eval_samples_per_second": 437.171, + "eval_steps_per_second": 3.514, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.2220005021754103e-05, + "loss": 0.7024, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.8664277980368635e-05, + "loss": 0.6997, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9107222897486772, + "eval_loss": 0.8129555583000183, + "eval_precision": 0.9104628437817891, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8415, + "eval_samples_per_second": 460.117, + "eval_steps_per_second": 3.698, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.522803337987148e-05, + "loss": 0.7033, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.909821372442197, + "eval_loss": 0.8067628145217896, + "eval_precision": 0.9076494624244972, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9607, + "eval_samples_per_second": 453.226, + "eval_steps_per_second": 3.643, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.1933822531023823e-05, + "loss": 0.7022, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.8803264608594133e-05, + "loss": 0.6998, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9109273990680833, + "eval_loss": 0.8095477223396301, + "eval_precision": 0.9094140793033426, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.0457, + "eval_samples_per_second": 448.436, + "eval_steps_per_second": 3.604, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.585690476951173e-05, + "loss": 0.7, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.3114079319561077e-05, + "loss": 0.6996, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.915288285412851, + "eval_loss": 0.8015657663345337, + "eval_precision": 0.9138025920826565, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.187, + "eval_samples_per_second": 440.699, + "eval_steps_per_second": 3.542, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.0592788813497991e-05, + "loss": 0.6979, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9218403547671841, + "eval_f1": 0.9157345647914885, + "eval_loss": 0.804965078830719, + "eval_precision": 0.9148308975478764, + "eval_recall": 0.9218403547671841, + "eval_runtime": 7.9686, + "eval_samples_per_second": 452.776, + "eval_steps_per_second": 3.639, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 8.309579921403324e-06, + "loss": 0.6982, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 6.279436836559165e-06, + "loss": 0.6954, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9165629470219959, + "eval_loss": 0.7967893481254578, + "eval_precision": 0.9147617955927566, + "eval_recall": 0.9212860310421286, + "eval_runtime": 8.0467, + "eval_samples_per_second": 448.382, + "eval_steps_per_second": 3.604, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.51568293751356e-06, + "loss": 0.6958, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.0298933497036873e-06, + "loss": 0.6936, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9171500454838738, + "eval_loss": 0.8044392466545105, + "eval_precision": 0.9152909185140126, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.1313, + "eval_samples_per_second": 443.718, + "eval_steps_per_second": 3.566, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.8318189804763845e-06, + "loss": 0.6966, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9119604822109286, + "eval_loss": 0.8033170104026794, + "eval_precision": 0.9104227426555868, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.8391, + "eval_samples_per_second": 460.256, + "eval_steps_per_second": 3.699, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 9.293225260470455e-07, + "loss": 0.6962, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.2832687036920875e-07, + "loss": 0.6973, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9157126737529084, + "eval_loss": 0.8003931641578674, + "eval_precision": 0.912130007127053, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.1083, + "eval_samples_per_second": 444.975, + "eval_steps_per_second": 3.577, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 3.277621456155712e-08, + "loss": 0.6987, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9153488574068619, + "eval_loss": 0.8049823641777039, + "eval_precision": 0.9145494252636026, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.0298, + "eval_samples_per_second": 449.324, + "eval_steps_per_second": 3.612, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.06393182605018903, + "learning_rate": 9.37407092678336e-05, + "metric": "eval/loss", + "weight_decay": 0.01325512089439292 + } +} diff --git a/run-2p4iktw6/checkpoint-630/training_args.bin b/run-2p4iktw6/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4920ed799507af8ca06297b8721f39f7df0121ea --- /dev/null +++ b/run-2p4iktw6/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c14d3eb2333c519baff3e0037f0c44fb15e0cfde78c070d2c11565f6c19c1c7 +size 4792 diff --git a/run-3ekxh3sa/checkpoint-1232/model.safetensors b/run-3ekxh3sa/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2cab3387a4dd9f61ccba16c7c11def801b05421 --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbbda51d8554f62ae1cad6ed23ac02a35322436fffc55e2f84e033efbffef9a1 +size 198025308 diff --git a/run-3ekxh3sa/checkpoint-1232/optimizer.pt b/run-3ekxh3sa/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..48a44bd4dd0bf4b7834b778dd06ddaab3aad1992 --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58c6890d65948a6a60819ebecfe504f06497a14c1dcba31863452d6eafa0468 +size 395900602 diff --git a/run-3ekxh3sa/checkpoint-1232/rng_state.pth b/run-3ekxh3sa/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-3ekxh3sa/checkpoint-1232/scheduler.pt b/run-3ekxh3sa/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..612bbd863129b2f418f8745135635c630db2f7e0 --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714661729b3f40f2c48af37ad23ef3ff09d35334cf7a1b207e2fc43c2d8cbd97 +size 1064 diff --git a/run-3ekxh3sa/checkpoint-1232/trainer_state.json b/run-3ekxh3sa/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..adb91ed9d94073faddf8c0d8108ebe5b61c70296 --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9243348115299335, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-3ekxh3sa/checkpoint-1147", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.721582447456803e-05, + "loss": 1.3991, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8295454545454546, + "eval_loss": 0.9406095743179321, + "eval_runtime": 6.7839, + "eval_samples_per_second": 531.849, + "eval_steps_per_second": 8.402, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.443164894913606e-05, + "loss": 0.975, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 5.16474734237041e-05, + "loss": 0.8697, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8338781595230103, + "eval_runtime": 6.896, + "eval_samples_per_second": 523.2, + "eval_steps_per_second": 8.266, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 6.886329789827213e-05, + "loss": 0.8206, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8089684844017029, + "eval_runtime": 6.8879, + "eval_samples_per_second": 523.816, + "eval_steps_per_second": 8.275, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 8.607912237284016e-05, + "loss": 0.8023, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 9.931490397010198e-05, + "loss": 0.7878, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8339745402336121, + "eval_runtime": 6.9242, + "eval_samples_per_second": 521.075, + "eval_steps_per_second": 8.232, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 9.91185276336997e-05, + "loss": 0.7811, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 9.865445854143955e-05, + "loss": 0.7735, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8007537722587585, + "eval_runtime": 6.7698, + "eval_samples_per_second": 532.955, + "eval_steps_per_second": 8.42, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 9.792520850539316e-05, + "loss": 0.7664, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8072662949562073, + "eval_runtime": 6.9314, + "eval_samples_per_second": 520.533, + "eval_steps_per_second": 8.224, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 9.69347246512672e-05, + "loss": 0.7558, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 9.568836805426145e-05, + "loss": 0.7517, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.807176411151886, + "eval_runtime": 6.6089, + "eval_samples_per_second": 545.929, + "eval_steps_per_second": 8.625, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 9.419288472180897e-05, + "loss": 0.7486, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 9.245636908025691e-05, + "loss": 0.742, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8011844158172607, + "eval_runtime": 6.62, + "eval_samples_per_second": 545.017, + "eval_steps_per_second": 8.61, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 9.048822016311923e-05, + "loss": 0.7303, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8109549283981323, + "eval_runtime": 6.4309, + "eval_samples_per_second": 561.046, + "eval_steps_per_second": 8.864, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 8.829909073803584e-05, + "loss": 0.7404, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 8.590082964779303e-05, + "loss": 0.7277, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.7979661822319031, + "eval_runtime": 7.0683, + "eval_samples_per_second": 510.446, + "eval_steps_per_second": 8.064, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 8.330641767748885e-05, + "loss": 0.724, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.7978601455688477, + "eval_runtime": 6.6977, + "eval_samples_per_second": 538.694, + "eval_steps_per_second": 8.51, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 8.05298972949677e-05, + "loss": 0.7215, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 7.758629664480999e-05, + "loss": 0.7184, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7982057929039001, + "eval_runtime": 6.7476, + "eval_samples_per_second": 534.71, + "eval_steps_per_second": 8.447, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 7.449154820726551e-05, + "loss": 0.7184, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 7.126240256239565e-05, + "loss": 0.7189, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.7983158826828003, + "eval_runtime": 6.7226, + "eval_samples_per_second": 536.698, + "eval_steps_per_second": 8.479, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 6.791633772618323e-05, + "loss": 0.7161, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8047741055488586, + "eval_runtime": 6.9456, + "eval_samples_per_second": 519.466, + "eval_steps_per_second": 8.207, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 6.447146454933571e-05, + "loss": 0.7174, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 6.094642869081846e-05, + "loss": 0.7072, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8094056248664856, + "eval_runtime": 6.7463, + "eval_samples_per_second": 534.814, + "eval_steps_per_second": 8.449, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 5.736030969669442e-05, + "loss": 0.7104, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 5.3732517730514215e-05, + "loss": 0.7057, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8126114010810852, + "eval_runtime": 6.8439, + "eval_samples_per_second": 527.186, + "eval_steps_per_second": 8.329, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 5.008268851421187e-05, + "loss": 0.7075, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8077898621559143, + "eval_runtime": 6.6053, + "eval_samples_per_second": 546.231, + "eval_steps_per_second": 8.629, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 4.64305770481475e-05, + "loss": 0.7026, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 4.279595068554542e-05, + "loss": 0.7021, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8008798360824585, + "eval_runtime": 6.9773, + "eval_samples_per_second": 517.108, + "eval_steps_per_second": 8.169, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 3.919848214007159e-05, + "loss": 0.7, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 3.565764300565529e-05, + "loss": 0.7027, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8068927526473999, + "eval_runtime": 6.8547, + "eval_samples_per_second": 526.354, + "eval_steps_per_second": 8.315, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 3.2192598364888074e-05, + "loss": 0.7046, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.7985545992851257, + "eval_runtime": 6.7345, + "eval_samples_per_second": 535.751, + "eval_steps_per_second": 8.464, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 2.8822103056440263e-05, + "loss": 0.6975, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 2.5564400162956554e-05, + "loss": 0.6948, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8047547340393066, + "eval_runtime": 6.9281, + "eval_samples_per_second": 520.781, + "eval_steps_per_second": 8.227, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 2.243712226887305e-05, + "loss": 0.6955, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8027793765068054, + "eval_runtime": 6.7629, + "eval_samples_per_second": 533.497, + "eval_steps_per_second": 8.428, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 1.9457196022605978e-05, + "loss": 0.6981, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 1.6640750519677137e-05, + "loss": 0.6955, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.800028383731842, + "eval_runtime": 6.8203, + "eval_samples_per_second": 529.009, + "eval_steps_per_second": 8.357, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.400303000265985e-05, + "loss": 0.6957, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.1558311350464133e-05, + "loss": 0.6954, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.797659158706665, + "eval_runtime": 6.7863, + "eval_samples_per_second": 531.656, + "eval_steps_per_second": 8.399, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 9.319826803557244e-06, + "loss": 0.6908, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8041421175003052, + "eval_runtime": 6.6775, + "eval_samples_per_second": 540.324, + "eval_steps_per_second": 8.536, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 7.299692343375395e-06, + "loss": 0.6909, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 5.508842113579347e-06, + "loss": 0.6899, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8050050139427185, + "eval_runtime": 6.6773, + "eval_samples_per_second": 540.337, + "eval_steps_per_second": 8.536, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 3.956969238103917e-06, + "loss": 0.6924, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 2.6524733563286136e-06, + "loss": 0.6957, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.797722578048706, + "eval_runtime": 6.823, + "eval_samples_per_second": 528.799, + "eval_steps_per_second": 8.354, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.6024151593397756e-06, + "loss": 0.6922, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.794511079788208, + "eval_runtime": 6.8758, + "eval_samples_per_second": 524.736, + "eval_steps_per_second": 8.29, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 8.124781733602129e-07, + "loss": 0.6893, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.869379971967787e-07, + "loss": 0.6899, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8038355708122253, + "eval_runtime": 6.8749, + "eval_samples_per_second": 524.81, + "eval_steps_per_second": 8.291, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 9.932206427635402e-05, + "metric": "eval/loss", + "warmup_ratio": 0.11897323802421422 + } +} diff --git a/run-3ekxh3sa/checkpoint-1232/training_args.bin b/run-3ekxh3sa/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..48b23686253e099409218d1745663e3e404845bb --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dd747ca9278c35284ea1468244e655165f6ebf6c0475f32358eac1a56ba8cc6 +size 4792 diff --git a/run-3ekxh3sa/checkpoint-1260/model.safetensors b/run-3ekxh3sa/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77f5753a46cce22189e50ef35c989a04e0e0dca5 --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b473aa2a07f408fad5ed2cd9bdb8b24ce9c1ac102ce0e206d5e476b4bf71893 +size 198025308 diff --git a/run-3ekxh3sa/checkpoint-1260/optimizer.pt b/run-3ekxh3sa/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e3198578fbe5aadd04354002d8959d3a49c5696 --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7405e4e1e2dd046cb8ec722f8dc399d21a5aede5b00507d4cd946d7e138a433 +size 395900602 diff --git a/run-3ekxh3sa/checkpoint-1260/rng_state.pth b/run-3ekxh3sa/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-3ekxh3sa/checkpoint-1260/scheduler.pt b/run-3ekxh3sa/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2118e91ba95d0a58c34b25a0dfcce23875235b93 --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a96d96afc1c0360dc2b4152d360fb4b9176870094b7816feabac9f3b29d243a +size 1064 diff --git a/run-3ekxh3sa/checkpoint-1260/trainer_state.json b/run-3ekxh3sa/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3bbff6b7706d7fca83870aab9782f7ce526f57ef --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9246119733924612, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-3ekxh3sa/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.721582447456803e-05, + "loss": 1.3991, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8295454545454546, + "eval_loss": 0.9406095743179321, + "eval_runtime": 6.7839, + "eval_samples_per_second": 531.849, + "eval_steps_per_second": 8.402, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.443164894913606e-05, + "loss": 0.975, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 5.16474734237041e-05, + "loss": 0.8697, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8338781595230103, + "eval_runtime": 6.896, + "eval_samples_per_second": 523.2, + "eval_steps_per_second": 8.266, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 6.886329789827213e-05, + "loss": 0.8206, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8089684844017029, + "eval_runtime": 6.8879, + "eval_samples_per_second": 523.816, + "eval_steps_per_second": 8.275, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 8.607912237284016e-05, + "loss": 0.8023, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 9.931490397010198e-05, + "loss": 0.7878, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8339745402336121, + "eval_runtime": 6.9242, + "eval_samples_per_second": 521.075, + "eval_steps_per_second": 8.232, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 9.91185276336997e-05, + "loss": 0.7811, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 9.865445854143955e-05, + "loss": 0.7735, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8007537722587585, + "eval_runtime": 6.7698, + "eval_samples_per_second": 532.955, + "eval_steps_per_second": 8.42, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 9.792520850539316e-05, + "loss": 0.7664, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8072662949562073, + "eval_runtime": 6.9314, + "eval_samples_per_second": 520.533, + "eval_steps_per_second": 8.224, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 9.69347246512672e-05, + "loss": 0.7558, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 9.568836805426145e-05, + "loss": 0.7517, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.807176411151886, + "eval_runtime": 6.6089, + "eval_samples_per_second": 545.929, + "eval_steps_per_second": 8.625, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 9.419288472180897e-05, + "loss": 0.7486, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 9.245636908025691e-05, + "loss": 0.742, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8011844158172607, + "eval_runtime": 6.62, + "eval_samples_per_second": 545.017, + "eval_steps_per_second": 8.61, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 9.048822016311923e-05, + "loss": 0.7303, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8109549283981323, + "eval_runtime": 6.4309, + "eval_samples_per_second": 561.046, + "eval_steps_per_second": 8.864, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 8.829909073803584e-05, + "loss": 0.7404, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 8.590082964779303e-05, + "loss": 0.7277, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.7979661822319031, + "eval_runtime": 7.0683, + "eval_samples_per_second": 510.446, + "eval_steps_per_second": 8.064, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 8.330641767748885e-05, + "loss": 0.724, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.7978601455688477, + "eval_runtime": 6.6977, + "eval_samples_per_second": 538.694, + "eval_steps_per_second": 8.51, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 8.05298972949677e-05, + "loss": 0.7215, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 7.758629664480999e-05, + "loss": 0.7184, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7982057929039001, + "eval_runtime": 6.7476, + "eval_samples_per_second": 534.71, + "eval_steps_per_second": 8.447, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 7.449154820726551e-05, + "loss": 0.7184, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 7.126240256239565e-05, + "loss": 0.7189, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.7983158826828003, + "eval_runtime": 6.7226, + "eval_samples_per_second": 536.698, + "eval_steps_per_second": 8.479, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 6.791633772618323e-05, + "loss": 0.7161, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8047741055488586, + "eval_runtime": 6.9456, + "eval_samples_per_second": 519.466, + "eval_steps_per_second": 8.207, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 6.447146454933571e-05, + "loss": 0.7174, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 6.094642869081846e-05, + "loss": 0.7072, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8094056248664856, + "eval_runtime": 6.7463, + "eval_samples_per_second": 534.814, + "eval_steps_per_second": 8.449, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 5.736030969669442e-05, + "loss": 0.7104, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 5.3732517730514215e-05, + "loss": 0.7057, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8126114010810852, + "eval_runtime": 6.8439, + "eval_samples_per_second": 527.186, + "eval_steps_per_second": 8.329, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 5.008268851421187e-05, + "loss": 0.7075, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8077898621559143, + "eval_runtime": 6.6053, + "eval_samples_per_second": 546.231, + "eval_steps_per_second": 8.629, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 4.64305770481475e-05, + "loss": 0.7026, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 4.279595068554542e-05, + "loss": 0.7021, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8008798360824585, + "eval_runtime": 6.9773, + "eval_samples_per_second": 517.108, + "eval_steps_per_second": 8.169, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 3.919848214007159e-05, + "loss": 0.7, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 3.565764300565529e-05, + "loss": 0.7027, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8068927526473999, + "eval_runtime": 6.8547, + "eval_samples_per_second": 526.354, + "eval_steps_per_second": 8.315, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 3.2192598364888074e-05, + "loss": 0.7046, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.7985545992851257, + "eval_runtime": 6.7345, + "eval_samples_per_second": 535.751, + "eval_steps_per_second": 8.464, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 2.8822103056440263e-05, + "loss": 0.6975, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 2.5564400162956554e-05, + "loss": 0.6948, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8047547340393066, + "eval_runtime": 6.9281, + "eval_samples_per_second": 520.781, + "eval_steps_per_second": 8.227, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 2.243712226887305e-05, + "loss": 0.6955, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8027793765068054, + "eval_runtime": 6.7629, + "eval_samples_per_second": 533.497, + "eval_steps_per_second": 8.428, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 1.9457196022605978e-05, + "loss": 0.6981, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 1.6640750519677137e-05, + "loss": 0.6955, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.800028383731842, + "eval_runtime": 6.8203, + "eval_samples_per_second": 529.009, + "eval_steps_per_second": 8.357, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.400303000265985e-05, + "loss": 0.6957, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.1558311350464133e-05, + "loss": 0.6954, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.797659158706665, + "eval_runtime": 6.7863, + "eval_samples_per_second": 531.656, + "eval_steps_per_second": 8.399, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 9.319826803557244e-06, + "loss": 0.6908, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8041421175003052, + "eval_runtime": 6.6775, + "eval_samples_per_second": 540.324, + "eval_steps_per_second": 8.536, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 7.299692343375395e-06, + "loss": 0.6909, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 5.508842113579347e-06, + "loss": 0.6899, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8050050139427185, + "eval_runtime": 6.6773, + "eval_samples_per_second": 540.337, + "eval_steps_per_second": 8.536, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 3.956969238103917e-06, + "loss": 0.6924, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 2.6524733563286136e-06, + "loss": 0.6957, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.797722578048706, + "eval_runtime": 6.823, + "eval_samples_per_second": 528.799, + "eval_steps_per_second": 8.354, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.6024151593397756e-06, + "loss": 0.6922, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.794511079788208, + "eval_runtime": 6.8758, + "eval_samples_per_second": 524.736, + "eval_steps_per_second": 8.29, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 8.124781733602129e-07, + "loss": 0.6893, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.869379971967787e-07, + "loss": 0.6899, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8038355708122253, + "eval_runtime": 6.8749, + "eval_samples_per_second": 524.81, + "eval_steps_per_second": 8.291, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 2.8639160210733933e-08, + "loss": 0.693, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7983114123344421, + "eval_runtime": 7.0129, + "eval_samples_per_second": 514.48, + "eval_steps_per_second": 8.128, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 9.932206427635402e-05, + "metric": "eval/loss", + "warmup_ratio": 0.11897323802421422 + } +} diff --git a/run-3ekxh3sa/checkpoint-1260/training_args.bin b/run-3ekxh3sa/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..48b23686253e099409218d1745663e3e404845bb --- /dev/null +++ b/run-3ekxh3sa/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dd747ca9278c35284ea1468244e655165f6ebf6c0475f32358eac1a56ba8cc6 +size 4792 diff --git a/run-4abf38ea/checkpoint-1232/model.safetensors b/run-4abf38ea/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cf37e7000de7efb7fb52dd449689811f85c1c3f --- /dev/null +++ b/run-4abf38ea/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d47e715309373591cb76591ab811bbce9d7befa3e3d9158748dc4e3bd2f5ccdd +size 198025308 diff --git a/run-4abf38ea/checkpoint-1232/optimizer.pt b/run-4abf38ea/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2a0aac8ab101dd69e9eb690b538903f7b753d5f --- /dev/null +++ b/run-4abf38ea/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a63d39d35fd3896ccca54d7358940d44831a637edd9788ef8520a3ba8e16169d +size 395900602 diff --git a/run-4abf38ea/checkpoint-1232/rng_state.pth b/run-4abf38ea/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-4abf38ea/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-4abf38ea/checkpoint-1232/scheduler.pt b/run-4abf38ea/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a1da7c4ec5a082c088dd00021b3863b539dfa3e --- /dev/null +++ b/run-4abf38ea/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:004ba669d413a028f5267e4feb45e55b63925ba749391b19d5ad4d633a936216 +size 1064 diff --git a/run-4abf38ea/checkpoint-1232/trainer_state.json b/run-4abf38ea/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7970f2636062b35f041245c5850028630e167e5b --- /dev/null +++ b/run-4abf38ea/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9185144124168514, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-4abf38ea/checkpoint-1232", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.286903708210888e-05, + "loss": 1.3076, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8702882483370288, + "eval_loss": 0.9072703719139099, + "eval_runtime": 6.8142, + "eval_samples_per_second": 529.48, + "eval_steps_per_second": 8.365, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 6.573807416421776e-05, + "loss": 0.917, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 9.860711124632664e-05, + "loss": 0.8338, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8249408602714539, + "eval_runtime": 6.8163, + "eval_samples_per_second": 529.316, + "eval_steps_per_second": 8.362, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00013147614832843553, + "loss": 0.8073, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.8433846235275269, + "eval_runtime": 6.925, + "eval_samples_per_second": 521.013, + "eval_steps_per_second": 8.231, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00016434518541054443, + "loss": 0.7965, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001972142224926533, + "loss": 0.7816, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8977272727272727, + "eval_loss": 0.836298406124115, + "eval_runtime": 6.5445, + "eval_samples_per_second": 551.3, + "eval_steps_per_second": 8.71, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00023008325957476217, + "loss": 0.7798, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00026295229665687105, + "loss": 0.775, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8228710889816284, + "eval_runtime": 6.4962, + "eval_samples_per_second": 555.4, + "eval_steps_per_second": 8.774, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00029582133373897996, + "loss": 0.7779, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8428367972373962, + "eval_runtime": 6.9299, + "eval_samples_per_second": 520.642, + "eval_steps_per_second": 8.225, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00032869037082108887, + "loss": 0.7661, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0003615594079031977, + "loss": 0.7646, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8700110864745011, + "eval_loss": 0.8653149008750916, + "eval_runtime": 6.3456, + "eval_samples_per_second": 568.587, + "eval_steps_per_second": 8.983, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0003944284449853066, + "loss": 0.7686, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00040287226793887186, + "loss": 0.767, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8855321507760532, + "eval_loss": 0.8640886545181274, + "eval_runtime": 6.6648, + "eval_samples_per_second": 541.35, + "eval_steps_per_second": 8.552, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00040100650983161467, + "loss": 0.7647, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8874722838137472, + "eval_loss": 0.8541760444641113, + "eval_runtime": 6.9721, + "eval_samples_per_second": 517.494, + "eval_steps_per_second": 8.175, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00039763951531864575, + "loss": 0.7687, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00039279663784007107, + "loss": 0.7621, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8816518847006651, + "eval_loss": 0.8915682435035706, + "eval_runtime": 6.8818, + "eval_samples_per_second": 524.278, + "eval_steps_per_second": 8.283, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00038651434422220127, + "loss": 0.7569, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8805432372505543, + "eval_loss": 0.8563141226768494, + "eval_runtime": 6.812, + "eval_samples_per_second": 529.655, + "eval_steps_per_second": 8.368, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00037883994008265924, + "loss": 0.7551, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00036983121361948076, + "loss": 0.7479, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.841917872428894, + "eval_runtime": 6.8898, + "eval_samples_per_second": 523.673, + "eval_steps_per_second": 8.273, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003595560004664737, + "loss": 0.7465, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003480916728914715, + "loss": 0.7432, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.852349042892456, + "eval_runtime": 6.9248, + "eval_samples_per_second": 521.026, + "eval_steps_per_second": 8.231, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003355245571838127, + "loss": 0.7382, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8101441241685144, + "eval_loss": 0.9869778752326965, + "eval_runtime": 6.9108, + "eval_samples_per_second": 522.084, + "eval_steps_per_second": 8.248, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00032194928361811497, + "loss": 0.746, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00030746807388911074, + "loss": 0.7329, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8561529933481153, + "eval_loss": 0.9042383432388306, + "eval_runtime": 6.9025, + "eval_samples_per_second": 522.711, + "eval_steps_per_second": 8.258, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002921899713831528, + "loss": 0.7405, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00027623002008244133, + "loss": 0.7265, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8985587583148559, + "eval_loss": 0.8415153622627258, + "eval_runtime": 6.9639, + "eval_samples_per_second": 518.102, + "eval_steps_per_second": 8.185, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00025970839828481375, + "loss": 0.727, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8222659230232239, + "eval_runtime": 7.191, + "eval_samples_per_second": 501.74, + "eval_steps_per_second": 7.927, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00024274951366218213, + "loss": 0.7194, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00022548106647181986, + "loss": 0.7164, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8195874094963074, + "eval_runtime": 6.5352, + "eval_samples_per_second": 552.084, + "eval_steps_per_second": 8.722, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00020803308797450766, + "loss": 0.713, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00019053696130024423, + "loss": 0.7076, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8270972967147827, + "eval_runtime": 6.7989, + "eval_samples_per_second": 530.676, + "eval_steps_per_second": 8.384, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00017312443213439175, + "loss": 0.7155, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8176755309104919, + "eval_runtime": 6.8087, + "eval_samples_per_second": 529.909, + "eval_steps_per_second": 8.372, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00015592661667378379, + "loss": 0.7073, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00013907301432287557, + "loss": 0.7062, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8150787949562073, + "eval_runtime": 6.6679, + "eval_samples_per_second": 541.101, + "eval_steps_per_second": 8.548, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001226905325643279, + "loss": 0.6982, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8196025490760803, + "eval_runtime": 6.5892, + "eval_samples_per_second": 547.561, + "eval_steps_per_second": 8.65, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00010690253134673957, + "loss": 0.7002, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 9.182789418528163e-05, + "loss": 0.7, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8116112947463989, + "eval_runtime": 6.6412, + "eval_samples_per_second": 543.276, + "eval_steps_per_second": 8.583, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 7.758013296983661e-05, + "loss": 0.6987, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 6.426653322143199e-05, + "loss": 0.6943, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.824604868888855, + "eval_runtime": 6.7252, + "eval_samples_per_second": 536.487, + "eval_steps_per_second": 8.476, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 5.198734623317661e-05, + "loss": 0.6908, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8131895065307617, + "eval_runtime": 6.751, + "eval_samples_per_second": 534.439, + "eval_steps_per_second": 8.443, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 4.083503417887494e-05, + "loss": 0.6931, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.089357387364271e-05, + "loss": 0.6899, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8158280253410339, + "eval_runtime": 6.8648, + "eval_samples_per_second": 525.579, + "eval_steps_per_second": 8.303, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.223782442920231e-05, + "loss": 0.6913, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.493296356540634e-05, + "loss": 0.6961, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8066160678863525, + "eval_runtime": 6.8803, + "eval_samples_per_second": 524.392, + "eval_steps_per_second": 8.284, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 9.033996822560643e-06, + "loss": 0.6906, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8079826831817627, + "eval_runtime": 6.8936, + "eval_samples_per_second": 523.381, + "eval_steps_per_second": 8.268, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 4.585343370172514e-06, + "loss": 0.6842, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.6205015309814372e-06, + "loss": 0.6874, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8057360649108887, + "eval_runtime": 6.6802, + "eval_samples_per_second": 540.1, + "eval_steps_per_second": 8.533, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00040327780112279745, + "metric": "eval/loss", + "warmup_ratio": 0.25309968089828383 + } +} diff --git a/run-4abf38ea/checkpoint-1232/training_args.bin b/run-4abf38ea/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7c626f1fefeb813babfc7e6e7848ea026648708 --- /dev/null +++ b/run-4abf38ea/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86beedd2cfc10ab95b250403ad99aa1d907f7d26ae867466def7b89af97c83d1 +size 4792 diff --git a/run-4abf38ea/checkpoint-1260/model.safetensors b/run-4abf38ea/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5169dd3dd8fb8cca0f4f837d267706976f7d6a96 --- /dev/null +++ b/run-4abf38ea/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09cd20d321c5812022ea600dd208f0fa0d9fa60d75f43f558d46d2cfd384c228 +size 198025308 diff --git a/run-4abf38ea/checkpoint-1260/optimizer.pt b/run-4abf38ea/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d682acf39d0936c5c7a1122f2e49fd567fd2d39 --- /dev/null +++ b/run-4abf38ea/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62bb3ee3b46a9aeaf1b3460e64e5a1103baa949aae3c6de73d57ad42c8533aea +size 395900602 diff --git a/run-4abf38ea/checkpoint-1260/rng_state.pth b/run-4abf38ea/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-4abf38ea/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-4abf38ea/checkpoint-1260/scheduler.pt b/run-4abf38ea/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..80fb18cd248e680f268ad4ca0db94d0a15243bc5 --- /dev/null +++ b/run-4abf38ea/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe34800d7dfa29df7bf8a02a64f931ae7e21fee469a4c3f2a31b87ec0595ac05 +size 1064 diff --git a/run-4abf38ea/checkpoint-1260/trainer_state.json b/run-4abf38ea/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..92d6482e379553a237bfacc590d357c6ff088246 --- /dev/null +++ b/run-4abf38ea/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9187915742793792, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-4abf38ea/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.286903708210888e-05, + "loss": 1.3076, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8702882483370288, + "eval_loss": 0.9072703719139099, + "eval_runtime": 6.8142, + "eval_samples_per_second": 529.48, + "eval_steps_per_second": 8.365, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 6.573807416421776e-05, + "loss": 0.917, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 9.860711124632664e-05, + "loss": 0.8338, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8249408602714539, + "eval_runtime": 6.8163, + "eval_samples_per_second": 529.316, + "eval_steps_per_second": 8.362, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00013147614832843553, + "loss": 0.8073, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.8433846235275269, + "eval_runtime": 6.925, + "eval_samples_per_second": 521.013, + "eval_steps_per_second": 8.231, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00016434518541054443, + "loss": 0.7965, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001972142224926533, + "loss": 0.7816, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8977272727272727, + "eval_loss": 0.836298406124115, + "eval_runtime": 6.5445, + "eval_samples_per_second": 551.3, + "eval_steps_per_second": 8.71, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00023008325957476217, + "loss": 0.7798, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00026295229665687105, + "loss": 0.775, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8228710889816284, + "eval_runtime": 6.4962, + "eval_samples_per_second": 555.4, + "eval_steps_per_second": 8.774, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00029582133373897996, + "loss": 0.7779, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8428367972373962, + "eval_runtime": 6.9299, + "eval_samples_per_second": 520.642, + "eval_steps_per_second": 8.225, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00032869037082108887, + "loss": 0.7661, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0003615594079031977, + "loss": 0.7646, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8700110864745011, + "eval_loss": 0.8653149008750916, + "eval_runtime": 6.3456, + "eval_samples_per_second": 568.587, + "eval_steps_per_second": 8.983, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0003944284449853066, + "loss": 0.7686, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00040287226793887186, + "loss": 0.767, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8855321507760532, + "eval_loss": 0.8640886545181274, + "eval_runtime": 6.6648, + "eval_samples_per_second": 541.35, + "eval_steps_per_second": 8.552, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00040100650983161467, + "loss": 0.7647, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8874722838137472, + "eval_loss": 0.8541760444641113, + "eval_runtime": 6.9721, + "eval_samples_per_second": 517.494, + "eval_steps_per_second": 8.175, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00039763951531864575, + "loss": 0.7687, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00039279663784007107, + "loss": 0.7621, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8816518847006651, + "eval_loss": 0.8915682435035706, + "eval_runtime": 6.8818, + "eval_samples_per_second": 524.278, + "eval_steps_per_second": 8.283, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00038651434422220127, + "loss": 0.7569, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8805432372505543, + "eval_loss": 0.8563141226768494, + "eval_runtime": 6.812, + "eval_samples_per_second": 529.655, + "eval_steps_per_second": 8.368, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00037883994008265924, + "loss": 0.7551, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00036983121361948076, + "loss": 0.7479, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.841917872428894, + "eval_runtime": 6.8898, + "eval_samples_per_second": 523.673, + "eval_steps_per_second": 8.273, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003595560004664737, + "loss": 0.7465, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003480916728914715, + "loss": 0.7432, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.852349042892456, + "eval_runtime": 6.9248, + "eval_samples_per_second": 521.026, + "eval_steps_per_second": 8.231, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003355245571838127, + "loss": 0.7382, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8101441241685144, + "eval_loss": 0.9869778752326965, + "eval_runtime": 6.9108, + "eval_samples_per_second": 522.084, + "eval_steps_per_second": 8.248, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00032194928361811497, + "loss": 0.746, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00030746807388911074, + "loss": 0.7329, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8561529933481153, + "eval_loss": 0.9042383432388306, + "eval_runtime": 6.9025, + "eval_samples_per_second": 522.711, + "eval_steps_per_second": 8.258, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002921899713831528, + "loss": 0.7405, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00027623002008244133, + "loss": 0.7265, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8985587583148559, + "eval_loss": 0.8415153622627258, + "eval_runtime": 6.9639, + "eval_samples_per_second": 518.102, + "eval_steps_per_second": 8.185, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00025970839828481375, + "loss": 0.727, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8222659230232239, + "eval_runtime": 7.191, + "eval_samples_per_second": 501.74, + "eval_steps_per_second": 7.927, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00024274951366218213, + "loss": 0.7194, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00022548106647181986, + "loss": 0.7164, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8195874094963074, + "eval_runtime": 6.5352, + "eval_samples_per_second": 552.084, + "eval_steps_per_second": 8.722, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00020803308797450766, + "loss": 0.713, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00019053696130024423, + "loss": 0.7076, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8270972967147827, + "eval_runtime": 6.7989, + "eval_samples_per_second": 530.676, + "eval_steps_per_second": 8.384, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00017312443213439175, + "loss": 0.7155, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8176755309104919, + "eval_runtime": 6.8087, + "eval_samples_per_second": 529.909, + "eval_steps_per_second": 8.372, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00015592661667378379, + "loss": 0.7073, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00013907301432287557, + "loss": 0.7062, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8150787949562073, + "eval_runtime": 6.6679, + "eval_samples_per_second": 541.101, + "eval_steps_per_second": 8.548, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001226905325643279, + "loss": 0.6982, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8196025490760803, + "eval_runtime": 6.5892, + "eval_samples_per_second": 547.561, + "eval_steps_per_second": 8.65, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00010690253134673957, + "loss": 0.7002, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 9.182789418528163e-05, + "loss": 0.7, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8116112947463989, + "eval_runtime": 6.6412, + "eval_samples_per_second": 543.276, + "eval_steps_per_second": 8.583, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 7.758013296983661e-05, + "loss": 0.6987, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 6.426653322143199e-05, + "loss": 0.6943, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.824604868888855, + "eval_runtime": 6.7252, + "eval_samples_per_second": 536.487, + "eval_steps_per_second": 8.476, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 5.198734623317661e-05, + "loss": 0.6908, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8131895065307617, + "eval_runtime": 6.751, + "eval_samples_per_second": 534.439, + "eval_steps_per_second": 8.443, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 4.083503417887494e-05, + "loss": 0.6931, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.089357387364271e-05, + "loss": 0.6899, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8158280253410339, + "eval_runtime": 6.8648, + "eval_samples_per_second": 525.579, + "eval_steps_per_second": 8.303, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.223782442920231e-05, + "loss": 0.6913, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.493296356540634e-05, + "loss": 0.6961, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8066160678863525, + "eval_runtime": 6.8803, + "eval_samples_per_second": 524.392, + "eval_steps_per_second": 8.284, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 9.033996822560643e-06, + "loss": 0.6906, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8079826831817627, + "eval_runtime": 6.8936, + "eval_samples_per_second": 523.381, + "eval_steps_per_second": 8.268, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 4.585343370172514e-06, + "loss": 0.6842, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.6205015309814372e-06, + "loss": 0.6874, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8057360649108887, + "eval_runtime": 6.6802, + "eval_samples_per_second": 540.1, + "eval_steps_per_second": 8.533, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.6179653887293934e-07, + "loss": 0.6898, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8059198260307312, + "eval_runtime": 6.8071, + "eval_samples_per_second": 530.034, + "eval_steps_per_second": 8.374, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00040327780112279745, + "metric": "eval/loss", + "warmup_ratio": 0.25309968089828383 + } +} diff --git a/run-4abf38ea/checkpoint-1260/training_args.bin b/run-4abf38ea/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7c626f1fefeb813babfc7e6e7848ea026648708 --- /dev/null +++ b/run-4abf38ea/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86beedd2cfc10ab95b250403ad99aa1d907f7d26ae867466def7b89af97c83d1 +size 4792 diff --git a/run-4jbqjjlr/checkpoint-616/model.safetensors b/run-4jbqjjlr/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0267a71ffe96dfff8b031bf8602f66d955b75a33 --- /dev/null +++ b/run-4jbqjjlr/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:379940c7a2859fc16bd0bde05a13f3be0b5b0595a5405ddef1a385ac8a7fd5df +size 198025308 diff --git a/run-4jbqjjlr/checkpoint-616/optimizer.pt b/run-4jbqjjlr/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c5b78adf331b02ff7cae494619a73ad314fbb1a --- /dev/null +++ b/run-4jbqjjlr/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aa04f333a78b5487e3797b8907e936ae02bae20c0a7223582c8dc23f2adf5c1 +size 395900602 diff --git a/run-4jbqjjlr/checkpoint-616/rng_state.pth b/run-4jbqjjlr/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-4jbqjjlr/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-4jbqjjlr/checkpoint-616/scheduler.pt b/run-4jbqjjlr/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8ff82aa382660eae4608af87f88ce94a67010ec --- /dev/null +++ b/run-4jbqjjlr/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55de599da5c1593a1688349535918a0a09a7134a5e779ffc89447120529bcc0 +size 1064 diff --git a/run-4jbqjjlr/checkpoint-616/trainer_state.json b/run-4jbqjjlr/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..38c57f6e3c4a6b275586dd175bd9a916bd81178a --- /dev/null +++ b/run-4jbqjjlr/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9201702473219616, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-4jbqjjlr/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.584304035084041e-05, + "loss": 1.3263, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1340593099594116, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2879, + "eval_samples_per_second": 435.335, + "eval_steps_per_second": 3.499, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00011168608070168083, + "loss": 0.9747, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016752912105252123, + "loss": 0.8388, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8863636363636364, + "eval_f1": 0.8871781963717755, + "eval_loss": 0.9012882113456726, + "eval_precision": 0.8953558900339356, + "eval_recall": 0.8863636363636364, + "eval_runtime": 7.7488, + "eval_samples_per_second": 465.622, + "eval_steps_per_second": 3.743, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00022337216140336165, + "loss": 0.7992, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9083954967705045, + "eval_loss": 0.806678056716919, + "eval_precision": 0.9067337052687803, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.6352, + "eval_samples_per_second": 472.546, + "eval_steps_per_second": 3.798, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.000279215201754202, + "loss": 0.7907, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00033505824210504245, + "loss": 0.7745, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8489467849223947, + "eval_f1": 0.8617584879932982, + "eval_loss": 0.9207568764686584, + "eval_precision": 0.8955829571877814, + "eval_recall": 0.8489467849223947, + "eval_runtime": 7.9798, + "eval_samples_per_second": 452.144, + "eval_steps_per_second": 3.634, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003909012824558828, + "loss": 0.782, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004077988741334023, + "loss": 0.7673, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8888580931263859, + "eval_f1": 0.8764212456097316, + "eval_loss": 0.8633736968040466, + "eval_precision": 0.8804401888922926, + "eval_recall": 0.8888580931263859, + "eval_runtime": 7.7827, + "eval_samples_per_second": 463.592, + "eval_steps_per_second": 3.726, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00040638346730980583, + "loss": 0.7681, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9027161862527716, + "eval_f1": 0.8940868404895402, + "eval_loss": 0.8288395404815674, + "eval_precision": 0.8908902959860563, + "eval_recall": 0.9027161862527716, + "eval_runtime": 7.9226, + "eval_samples_per_second": 455.403, + "eval_steps_per_second": 3.66, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0004037894965991336, + "loss": 0.7634, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004000320709081471, + "loss": 0.7562, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9040864736671298, + "eval_loss": 0.8155009150505066, + "eval_precision": 0.8997425594966963, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.0972, + "eval_samples_per_second": 445.584, + "eval_steps_per_second": 3.581, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00039513307583262207, + "loss": 0.7519, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003891210461819592, + "loss": 0.7424, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9002217294900222, + "eval_f1": 0.8914585796210459, + "eval_loss": 0.8367924094200134, + "eval_precision": 0.9005007368726486, + "eval_recall": 0.9002217294900222, + "eval_runtime": 8.0022, + "eval_samples_per_second": 450.878, + "eval_steps_per_second": 3.624, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00038203099977461927, + "loss": 0.7362, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8942853586253209, + "eval_loss": 0.8291991949081421, + "eval_precision": 0.8909823228016912, + "eval_recall": 0.9013303769401331, + "eval_runtime": 7.6839, + "eval_samples_per_second": 469.553, + "eval_steps_per_second": 3.774, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00037390423347246016, + "loss": 0.736, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00036478808264200143, + "loss": 0.74, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8924611973392461, + "eval_f1": 0.8729723068044194, + "eval_loss": 0.8576189279556274, + "eval_precision": 0.8849567381914036, + "eval_recall": 0.8924611973392461, + "eval_runtime": 7.6985, + "eval_samples_per_second": 468.664, + "eval_steps_per_second": 3.767, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0003547356454436631, + "loss": 0.7354, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8328713968957872, + "eval_f1": 0.8479373180069812, + "eval_loss": 0.930422306060791, + "eval_precision": 0.883590138998415, + "eval_recall": 0.8328713968957872, + "eval_runtime": 8.0959, + "eval_samples_per_second": 445.659, + "eval_steps_per_second": 3.582, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0003438054735548905, + "loss": 0.7387, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00033206123112858776, + "loss": 0.7226, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8920105624695288, + "eval_loss": 0.8393056392669678, + "eval_precision": 0.898043903473596, + "eval_recall": 0.9013303769401331, + "eval_runtime": 7.8733, + "eval_samples_per_second": 458.256, + "eval_steps_per_second": 3.683, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003195713239732986, + "loss": 0.7183, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003064085011150203, + "loss": 0.7146, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.7912971175166297, + "eval_f1": 0.8180268983845741, + "eval_loss": 0.9896758794784546, + "eval_precision": 0.883582965101536, + "eval_recall": 0.7912971175166297, + "eval_runtime": 7.3365, + "eval_samples_per_second": 491.791, + "eval_steps_per_second": 3.953, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002926494310614055, + "loss": 0.7162, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9001578899731278, + "eval_loss": 0.8248401880264282, + "eval_precision": 0.8973150619358384, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.1408, + "eval_samples_per_second": 443.199, + "eval_steps_per_second": 3.562, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00027837425523645276, + "loss": 0.7117, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002636661211867605, + "loss": 0.7103, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.9005643209003605, + "eval_loss": 0.8278890252113342, + "eval_precision": 0.8976190492719515, + "eval_recall": 0.9038248337028825, + "eval_runtime": 7.3274, + "eval_samples_per_second": 492.397, + "eval_steps_per_second": 3.958, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002486106982782458, + "loss": 0.707, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00023329567870420973, + "loss": 0.7113, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.8984711081030063, + "eval_loss": 0.8273665904998779, + "eval_precision": 0.9009745599385749, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.8195, + "eval_samples_per_second": 461.411, + "eval_steps_per_second": 3.709, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00021781026671119228, + "loss": 0.7065, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9058479713675404, + "eval_loss": 0.8171899914741516, + "eval_precision": 0.9029562443780125, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.9034, + "eval_samples_per_second": 456.51, + "eval_steps_per_second": 3.669, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00020224465901768433, + "loss": 0.7037, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00018668951945206199, + "loss": 0.6997, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.907636434121447, + "eval_loss": 0.8178182244300842, + "eval_precision": 0.9052636428679987, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.7601, + "eval_samples_per_second": 464.945, + "eval_steps_per_second": 3.737, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0001712354508697815, + "loss": 0.6984, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00015597246742571856, + "loss": 0.6998, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9041652532641681, + "eval_loss": 0.8142445683479309, + "eval_precision": 0.9002070630508643, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.8804, + "eval_samples_per_second": 457.846, + "eval_steps_per_second": 3.68, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00014098947027546755, + "loss": 0.697, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8994325585302008, + "eval_loss": 0.8266851902008057, + "eval_precision": 0.8982727493061794, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.5517, + "eval_samples_per_second": 477.774, + "eval_steps_per_second": 3.84, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00012637372975944408, + "loss": 0.694, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00011221037708587531, + "loss": 0.6906, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9054587294862068, + "eval_loss": 0.8134133815765381, + "eval_precision": 0.9035078575508294, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.7027, + "eval_samples_per_second": 468.405, + "eval_steps_per_second": 3.765, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 9.858190847343184e-05, + "loss": 0.6908, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9124237923617543, + "eval_loss": 0.8123248815536499, + "eval_precision": 0.9103662480998526, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.256, + "eval_samples_per_second": 497.245, + "eval_steps_per_second": 3.997, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 8.556770464168718e-05, + "loss": 0.6913, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 7.324356844819031e-05, + "loss": 0.6896, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9119567482837282, + "eval_loss": 0.8087641596794128, + "eval_precision": 0.9109670351337799, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.027, + "eval_samples_per_second": 449.483, + "eval_steps_per_second": 3.613, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 6.168128336524313e-05, + "loss": 0.6881, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 5.094819536809049e-05, + "loss": 0.6891, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9093655215368961, + "eval_loss": 0.8144046068191528, + "eval_precision": 0.9079335179083904, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.2281, + "eval_samples_per_second": 438.498, + "eval_steps_per_second": 3.525, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 4.110682066986595e-05, + "loss": 0.6877, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9123528160112668, + "eval_loss": 0.81013023853302, + "eval_precision": 0.9105675951961423, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.6918, + "eval_samples_per_second": 469.074, + "eval_steps_per_second": 3.77, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 3.221448158809106e-05, + "loss": 0.6846, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.43229726636696e-05, + "loss": 0.687, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9130445836616415, + "eval_loss": 0.805779218673706, + "eval_precision": 0.9109436419297111, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.3344, + "eval_samples_per_second": 491.931, + "eval_steps_per_second": 3.954, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.7478258977107345e-05, + "loss": 0.6849, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.1720208419152941e-05, + "loss": 0.6845, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9089185511320002, + "eval_loss": 0.8081110119819641, + "eval_precision": 0.9058164709617514, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.9019, + "eval_samples_per_second": 456.597, + "eval_steps_per_second": 3.67, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 7.0823594752833e-06, + "loss": 0.6858, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.914727181441485, + "eval_loss": 0.8074522018432617, + "eval_precision": 0.9122508492208068, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.1424, + "eval_samples_per_second": 443.111, + "eval_steps_per_second": 3.562, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 3.59172587660107e-06, + "loss": 0.6851, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.268639254979937e-06, + "loss": 0.6856, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9237804878048781, + "eval_f1": 0.9201702473219616, + "eval_loss": 0.7972337603569031, + "eval_precision": 0.9181208978191403, + "eval_recall": 0.9237804878048781, + "eval_runtime": 8.0125, + "eval_samples_per_second": 450.299, + "eval_steps_per_second": 3.619, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.0902354288194084, + "learning_rate": 0.0004080837564099876, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-4jbqjjlr/checkpoint-616/training_args.bin b/run-4jbqjjlr/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbe310fc4415f87cd830ee68abf143a6de3aad6a --- /dev/null +++ b/run-4jbqjjlr/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f7b9046ddb3b212b9a304e6f91f5d347686bbad86abff303d155b23ff786c7 +size 4792 diff --git a/run-4jbqjjlr/checkpoint-630/model.safetensors b/run-4jbqjjlr/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22e304e3e3087b58996ac1502d5cfac671be6912 --- /dev/null +++ b/run-4jbqjjlr/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684ce9719453779615af573797ce5a6bf01ed929d069d5f7c8e1f3efe9c308b +size 198025308 diff --git a/run-4jbqjjlr/checkpoint-630/optimizer.pt b/run-4jbqjjlr/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ac91a7abd1710645ad43678979cd47ae4c5be14 --- /dev/null +++ b/run-4jbqjjlr/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9789b72aa97691351fd169bfa90a26774fe0e4397effa37fbbcaae59c4925391 +size 395900602 diff --git a/run-4jbqjjlr/checkpoint-630/rng_state.pth b/run-4jbqjjlr/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-4jbqjjlr/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-4jbqjjlr/checkpoint-630/scheduler.pt b/run-4jbqjjlr/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4132e26b73d09f2ea598604e837af31c4d1028f --- /dev/null +++ b/run-4jbqjjlr/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f32152e74621cb85f79cc3f01640115c9ccc58b81d33a5a8d217933c1640289 +size 1064 diff --git a/run-4jbqjjlr/checkpoint-630/trainer_state.json b/run-4jbqjjlr/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a0159944c0870e00e8e380345cf6177155778fe0 --- /dev/null +++ b/run-4jbqjjlr/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9201702473219616, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-4jbqjjlr/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.584304035084041e-05, + "loss": 1.3263, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1340593099594116, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2879, + "eval_samples_per_second": 435.335, + "eval_steps_per_second": 3.499, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00011168608070168083, + "loss": 0.9747, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00016752912105252123, + "loss": 0.8388, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8863636363636364, + "eval_f1": 0.8871781963717755, + "eval_loss": 0.9012882113456726, + "eval_precision": 0.8953558900339356, + "eval_recall": 0.8863636363636364, + "eval_runtime": 7.7488, + "eval_samples_per_second": 465.622, + "eval_steps_per_second": 3.743, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00022337216140336165, + "loss": 0.7992, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9083954967705045, + "eval_loss": 0.806678056716919, + "eval_precision": 0.9067337052687803, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.6352, + "eval_samples_per_second": 472.546, + "eval_steps_per_second": 3.798, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.000279215201754202, + "loss": 0.7907, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00033505824210504245, + "loss": 0.7745, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8489467849223947, + "eval_f1": 0.8617584879932982, + "eval_loss": 0.9207568764686584, + "eval_precision": 0.8955829571877814, + "eval_recall": 0.8489467849223947, + "eval_runtime": 7.9798, + "eval_samples_per_second": 452.144, + "eval_steps_per_second": 3.634, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003909012824558828, + "loss": 0.782, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004077988741334023, + "loss": 0.7673, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8888580931263859, + "eval_f1": 0.8764212456097316, + "eval_loss": 0.8633736968040466, + "eval_precision": 0.8804401888922926, + "eval_recall": 0.8888580931263859, + "eval_runtime": 7.7827, + "eval_samples_per_second": 463.592, + "eval_steps_per_second": 3.726, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00040638346730980583, + "loss": 0.7681, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9027161862527716, + "eval_f1": 0.8940868404895402, + "eval_loss": 0.8288395404815674, + "eval_precision": 0.8908902959860563, + "eval_recall": 0.9027161862527716, + "eval_runtime": 7.9226, + "eval_samples_per_second": 455.403, + "eval_steps_per_second": 3.66, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0004037894965991336, + "loss": 0.7634, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004000320709081471, + "loss": 0.7562, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9040864736671298, + "eval_loss": 0.8155009150505066, + "eval_precision": 0.8997425594966963, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.0972, + "eval_samples_per_second": 445.584, + "eval_steps_per_second": 3.581, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00039513307583262207, + "loss": 0.7519, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003891210461819592, + "loss": 0.7424, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9002217294900222, + "eval_f1": 0.8914585796210459, + "eval_loss": 0.8367924094200134, + "eval_precision": 0.9005007368726486, + "eval_recall": 0.9002217294900222, + "eval_runtime": 8.0022, + "eval_samples_per_second": 450.878, + "eval_steps_per_second": 3.624, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00038203099977461927, + "loss": 0.7362, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8942853586253209, + "eval_loss": 0.8291991949081421, + "eval_precision": 0.8909823228016912, + "eval_recall": 0.9013303769401331, + "eval_runtime": 7.6839, + "eval_samples_per_second": 469.553, + "eval_steps_per_second": 3.774, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00037390423347246016, + "loss": 0.736, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00036478808264200143, + "loss": 0.74, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8924611973392461, + "eval_f1": 0.8729723068044194, + "eval_loss": 0.8576189279556274, + "eval_precision": 0.8849567381914036, + "eval_recall": 0.8924611973392461, + "eval_runtime": 7.6985, + "eval_samples_per_second": 468.664, + "eval_steps_per_second": 3.767, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0003547356454436631, + "loss": 0.7354, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8328713968957872, + "eval_f1": 0.8479373180069812, + "eval_loss": 0.930422306060791, + "eval_precision": 0.883590138998415, + "eval_recall": 0.8328713968957872, + "eval_runtime": 8.0959, + "eval_samples_per_second": 445.659, + "eval_steps_per_second": 3.582, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0003438054735548905, + "loss": 0.7387, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00033206123112858776, + "loss": 0.7226, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8920105624695288, + "eval_loss": 0.8393056392669678, + "eval_precision": 0.898043903473596, + "eval_recall": 0.9013303769401331, + "eval_runtime": 7.8733, + "eval_samples_per_second": 458.256, + "eval_steps_per_second": 3.683, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003195713239732986, + "loss": 0.7183, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003064085011150203, + "loss": 0.7146, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.7912971175166297, + "eval_f1": 0.8180268983845741, + "eval_loss": 0.9896758794784546, + "eval_precision": 0.883582965101536, + "eval_recall": 0.7912971175166297, + "eval_runtime": 7.3365, + "eval_samples_per_second": 491.791, + "eval_steps_per_second": 3.953, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002926494310614055, + "loss": 0.7162, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9001578899731278, + "eval_loss": 0.8248401880264282, + "eval_precision": 0.8973150619358384, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.1408, + "eval_samples_per_second": 443.199, + "eval_steps_per_second": 3.562, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00027837425523645276, + "loss": 0.7117, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002636661211867605, + "loss": 0.7103, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.9005643209003605, + "eval_loss": 0.8278890252113342, + "eval_precision": 0.8976190492719515, + "eval_recall": 0.9038248337028825, + "eval_runtime": 7.3274, + "eval_samples_per_second": 492.397, + "eval_steps_per_second": 3.958, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002486106982782458, + "loss": 0.707, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00023329567870420973, + "loss": 0.7113, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.8984711081030063, + "eval_loss": 0.8273665904998779, + "eval_precision": 0.9009745599385749, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.8195, + "eval_samples_per_second": 461.411, + "eval_steps_per_second": 3.709, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00021781026671119228, + "loss": 0.7065, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9058479713675404, + "eval_loss": 0.8171899914741516, + "eval_precision": 0.9029562443780125, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.9034, + "eval_samples_per_second": 456.51, + "eval_steps_per_second": 3.669, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00020224465901768433, + "loss": 0.7037, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00018668951945206199, + "loss": 0.6997, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.907636434121447, + "eval_loss": 0.8178182244300842, + "eval_precision": 0.9052636428679987, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.7601, + "eval_samples_per_second": 464.945, + "eval_steps_per_second": 3.737, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0001712354508697815, + "loss": 0.6984, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00015597246742571856, + "loss": 0.6998, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9041652532641681, + "eval_loss": 0.8142445683479309, + "eval_precision": 0.9002070630508643, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.8804, + "eval_samples_per_second": 457.846, + "eval_steps_per_second": 3.68, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00014098947027546755, + "loss": 0.697, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8994325585302008, + "eval_loss": 0.8266851902008057, + "eval_precision": 0.8982727493061794, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.5517, + "eval_samples_per_second": 477.774, + "eval_steps_per_second": 3.84, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00012637372975944408, + "loss": 0.694, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00011221037708587531, + "loss": 0.6906, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9054587294862068, + "eval_loss": 0.8134133815765381, + "eval_precision": 0.9035078575508294, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.7027, + "eval_samples_per_second": 468.405, + "eval_steps_per_second": 3.765, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 9.858190847343184e-05, + "loss": 0.6908, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9124237923617543, + "eval_loss": 0.8123248815536499, + "eval_precision": 0.9103662480998526, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.256, + "eval_samples_per_second": 497.245, + "eval_steps_per_second": 3.997, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 8.556770464168718e-05, + "loss": 0.6913, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 7.324356844819031e-05, + "loss": 0.6896, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9119567482837282, + "eval_loss": 0.8087641596794128, + "eval_precision": 0.9109670351337799, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.027, + "eval_samples_per_second": 449.483, + "eval_steps_per_second": 3.613, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 6.168128336524313e-05, + "loss": 0.6881, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 5.094819536809049e-05, + "loss": 0.6891, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9093655215368961, + "eval_loss": 0.8144046068191528, + "eval_precision": 0.9079335179083904, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.2281, + "eval_samples_per_second": 438.498, + "eval_steps_per_second": 3.525, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 4.110682066986595e-05, + "loss": 0.6877, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9123528160112668, + "eval_loss": 0.81013023853302, + "eval_precision": 0.9105675951961423, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.6918, + "eval_samples_per_second": 469.074, + "eval_steps_per_second": 3.77, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 3.221448158809106e-05, + "loss": 0.6846, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.43229726636696e-05, + "loss": 0.687, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9130445836616415, + "eval_loss": 0.805779218673706, + "eval_precision": 0.9109436419297111, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.3344, + "eval_samples_per_second": 491.931, + "eval_steps_per_second": 3.954, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.7478258977107345e-05, + "loss": 0.6849, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.1720208419152941e-05, + "loss": 0.6845, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9089185511320002, + "eval_loss": 0.8081110119819641, + "eval_precision": 0.9058164709617514, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.9019, + "eval_samples_per_second": 456.597, + "eval_steps_per_second": 3.67, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 7.0823594752833e-06, + "loss": 0.6858, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.914727181441485, + "eval_loss": 0.8074522018432617, + "eval_precision": 0.9122508492208068, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.1424, + "eval_samples_per_second": 443.111, + "eval_steps_per_second": 3.562, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 3.59172587660107e-06, + "loss": 0.6851, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.268639254979937e-06, + "loss": 0.6856, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9237804878048781, + "eval_f1": 0.9201702473219616, + "eval_loss": 0.7972337603569031, + "eval_precision": 0.9181208978191403, + "eval_recall": 0.9237804878048781, + "eval_runtime": 8.0125, + "eval_samples_per_second": 450.299, + "eval_steps_per_second": 3.619, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.266307189325425e-07, + "loss": 0.682, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9133489713232321, + "eval_loss": 0.8121137619018555, + "eval_precision": 0.9121604940063871, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.9129, + "eval_samples_per_second": 455.967, + "eval_steps_per_second": 3.665, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.0902354288194084, + "learning_rate": 0.0004080837564099876, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-4jbqjjlr/checkpoint-630/training_args.bin b/run-4jbqjjlr/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbe310fc4415f87cd830ee68abf143a6de3aad6a --- /dev/null +++ b/run-4jbqjjlr/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f7b9046ddb3b212b9a304e6f91f5d347686bbad86abff303d155b23ff786c7 +size 4792 diff --git a/run-53jz7fuv/checkpoint-552/model.safetensors b/run-53jz7fuv/checkpoint-552/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a81d1935e32f560ba9e536e22773c65cc8b52653 --- /dev/null +++ b/run-53jz7fuv/checkpoint-552/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c0abd3e78415361e86f94a972c198c158399cf0e0093134cf8dd1e113ee6e81 +size 198025308 diff --git a/run-53jz7fuv/checkpoint-552/optimizer.pt b/run-53jz7fuv/checkpoint-552/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8643b7a7810a92339edafe02494e9ce85148827c --- /dev/null +++ b/run-53jz7fuv/checkpoint-552/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4dbd9977fffb2c8244a15cca646b5ea45cb77aad47eaf210fbcaf21bae1927 +size 395900602 diff --git a/run-53jz7fuv/checkpoint-552/rng_state.pth b/run-53jz7fuv/checkpoint-552/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a07d02214d4d2a0bd650d84451df8b01ad9e2e1f --- /dev/null +++ b/run-53jz7fuv/checkpoint-552/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea0e7f2a9ffdc1f2f52e0b770bd1a5190fc0a00c767b73b57597c52b6f4dee6 +size 14244 diff --git a/run-53jz7fuv/checkpoint-552/scheduler.pt b/run-53jz7fuv/checkpoint-552/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d34afc42190b9d8aba56c8d9ebdb95179d1557ed --- /dev/null +++ b/run-53jz7fuv/checkpoint-552/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8a7ef14a23a0169669280b34a58d1e5c6156e13fca1aa5cf208dfce5ab05424 +size 1064 diff --git a/run-53jz7fuv/checkpoint-552/trainer_state.json b/run-53jz7fuv/checkpoint-552/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3e5a256eb13a356a76f9a913b5476650e7e09b3f --- /dev/null +++ b/run-53jz7fuv/checkpoint-552/trainer_state.json @@ -0,0 +1,592 @@ +{ + "best_metric": 0.9143976714084776, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-53jz7fuv/checkpoint-552", + "epoch": 25.976470588235294, + "eval_steps": 500, + "global_step": 552, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.378869092534831e-05, + "loss": 1.2995, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1503448486328125, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.3107, + "eval_samples_per_second": 434.142, + "eval_steps_per_second": 3.489, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00014757738185069662, + "loss": 0.9464, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00022136607277604495, + "loss": 0.8285, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8406319290465631, + "eval_f1": 0.852736263314779, + "eval_loss": 0.947006344795227, + "eval_precision": 0.8854687716818802, + "eval_recall": 0.8406319290465631, + "eval_runtime": 8.0818, + "eval_samples_per_second": 446.436, + "eval_steps_per_second": 3.588, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00029515476370139325, + "loss": 0.8, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.895307019390465, + "eval_loss": 0.8166604042053223, + "eval_precision": 0.895600373172129, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.0961, + "eval_samples_per_second": 445.647, + "eval_steps_per_second": 3.582, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003689434546267416, + "loss": 0.7924, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004427321455520899, + "loss": 0.7794, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6599223946784922, + "eval_f1": 0.7102920067865718, + "eval_loss": 1.1635165214538574, + "eval_precision": 0.8726549957647445, + "eval_recall": 0.6599223946784922, + "eval_runtime": 7.6332, + "eval_samples_per_second": 472.674, + "eval_steps_per_second": 3.799, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005165208364774382, + "loss": 0.796, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005388486173762167, + "loss": 0.7757, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8805432372505543, + "eval_f1": 0.8773334790511929, + "eval_loss": 0.879237949848175, + "eval_precision": 0.8864864928693271, + "eval_recall": 0.8805432372505543, + "eval_runtime": 7.9933, + "eval_samples_per_second": 451.379, + "eval_steps_per_second": 3.628, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005369783571614465, + "loss": 0.778, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.875, + "eval_f1": 0.856802615183484, + "eval_loss": 0.8734626173973083, + "eval_precision": 0.8631760709471038, + "eval_recall": 0.875, + "eval_runtime": 8.2713, + "eval_samples_per_second": 436.207, + "eval_steps_per_second": 3.506, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005335507912224025, + "loss": 0.7731, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005285858838454891, + "loss": 0.7696, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.7804878048780488, + "eval_f1": 0.8094952278698527, + "eval_loss": 1.005456566810608, + "eval_precision": 0.8749780641719764, + "eval_recall": 0.7804878048780488, + "eval_runtime": 8.458, + "eval_samples_per_second": 426.578, + "eval_steps_per_second": 3.429, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005221125537545482, + "loss": 0.7617, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.000514168505670137, + "loss": 0.7535, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.871119733924612, + "eval_f1": 0.8637350836189582, + "eval_loss": 0.8737017512321472, + "eval_precision": 0.8691820561567153, + "eval_recall": 0.871119733924612, + "eval_runtime": 8.2672, + "eval_samples_per_second": 436.425, + "eval_steps_per_second": 3.508, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005048000106936684, + "loss": 0.7492, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8957871396895787, + "eval_f1": 0.8877932865479055, + "eval_loss": 0.8430231809616089, + "eval_precision": 0.8895626497267765, + "eval_recall": 0.8957871396895787, + "eval_runtime": 8.0069, + "eval_samples_per_second": 450.61, + "eval_steps_per_second": 3.622, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0004940616367955944, + "loss": 0.7515, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004820159309774396, + "loss": 0.7474, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8905210643015521, + "eval_f1": 0.8876570329441318, + "eval_loss": 0.8475670218467712, + "eval_precision": 0.8977710495990978, + "eval_recall": 0.8905210643015521, + "eval_runtime": 7.9501, + "eval_samples_per_second": 453.829, + "eval_steps_per_second": 3.648, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.000468733054958969, + "loss": 0.7451, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9029933481152993, + "eval_f1": 0.9014412435203255, + "eval_loss": 0.8295616507530212, + "eval_precision": 0.9006666247452533, + "eval_recall": 0.9029933481152993, + "eval_runtime": 8.1207, + "eval_samples_per_second": 444.296, + "eval_steps_per_second": 3.571, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0004542903765124787, + "loss": 0.7454, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0004387720188245314, + "loss": 0.7353, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8921840354767184, + "eval_f1": 0.8924850696189655, + "eval_loss": 0.8424952030181885, + "eval_precision": 0.8978571365485917, + "eval_recall": 0.8921840354767184, + "eval_runtime": 7.8206, + "eval_samples_per_second": 461.348, + "eval_steps_per_second": 3.708, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00042226837050993773, + "loss": 0.7265, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004048755591319665, + "loss": 0.7277, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8190133037694013, + "eval_f1": 0.8406730878971063, + "eval_loss": 0.9540680050849915, + "eval_precision": 0.8835019013446076, + "eval_recall": 0.8190133037694013, + "eval_runtime": 8.3485, + "eval_samples_per_second": 432.175, + "eval_steps_per_second": 3.474, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00038669489129533217, + "loss": 0.7298, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8988359201773836, + "eval_f1": 0.8936437037552686, + "eval_loss": 0.831924319267273, + "eval_precision": 0.8965423225960021, + "eval_recall": 0.8988359201773836, + "eval_runtime": 8.2585, + "eval_samples_per_second": 436.883, + "eval_steps_per_second": 3.512, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.000367832262573209, + "loss": 0.7231, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00034839754070522253, + "loss": 0.7201, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9044669532947774, + "eval_loss": 0.824361264705658, + "eval_precision": 0.9031627762580688, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.9484, + "eval_samples_per_second": 453.927, + "eval_steps_per_second": 3.649, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00032850392565906243, + "loss": 0.7173, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00030826729028311626, + "loss": 0.7214, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.8900885379859285, + "eval_loss": 0.8331730961799622, + "eval_precision": 0.8899670184070819, + "eval_recall": 0.9016075388026608, + "eval_runtime": 7.2986, + "eval_samples_per_second": 494.342, + "eval_steps_per_second": 3.973, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00028780550539057414, + "loss": 0.7114, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9054871730935844, + "eval_loss": 0.8191584348678589, + "eval_precision": 0.9037609511867994, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.1753, + "eval_samples_per_second": 441.328, + "eval_steps_per_second": 3.547, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002672377532061393, + "loss": 0.7104, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00024668383317425673, + "loss": 0.7059, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.9035577355537484, + "eval_loss": 0.8239393830299377, + "eval_precision": 0.9018666514340472, + "eval_recall": 0.9077050997782705, + "eval_runtime": 7.8397, + "eval_samples_per_second": 460.223, + "eval_steps_per_second": 3.699, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002262634641722694, + "loss": 0.7032, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00020609558719284705, + "loss": 0.7044, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9036268750748488, + "eval_loss": 0.8207970857620239, + "eval_precision": 0.9059613649467493, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.7152, + "eval_samples_per_second": 467.647, + "eval_steps_per_second": 3.759, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001862976725573018, + "loss": 0.7015, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.9009925002634688, + "eval_loss": 0.8348780870437622, + "eval_precision": 0.9005397621027742, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.6248, + "eval_samples_per_second": 473.196, + "eval_steps_per_second": 3.803, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00016698503569501257, + "loss": 0.7032, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00014827016547428785, + "loss": 0.6938, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9103315892112055, + "eval_loss": 0.8124563694000244, + "eval_precision": 0.9085347933280395, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.881, + "eval_samples_per_second": 457.807, + "eval_steps_per_second": 3.68, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00013026206899688556, + "loss": 0.6972, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9105816259077874, + "eval_loss": 0.8168894052505493, + "eval_precision": 0.9081968011566729, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.28, + "eval_samples_per_second": 435.748, + "eval_steps_per_second": 3.502, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00011306563667251915, + "loss": 0.6957, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 9.678103127154989e-05, + "loss": 0.6928, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9105142347847764, + "eval_loss": 0.8113499283790588, + "eval_precision": 0.9105825329516584, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8537, + "eval_samples_per_second": 459.402, + "eval_steps_per_second": 3.693, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 8.15031045144063e-05, + "loss": 0.691, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.732084459587836e-05, + "loss": 0.6919, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9087349483916861, + "eval_loss": 0.8187626600265503, + "eval_precision": 0.9075196437317545, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.0157, + "eval_samples_per_second": 450.115, + "eval_steps_per_second": 3.618, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 5.431685786224943e-05, + "loss": 0.6897, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.910268088752773, + "eval_loss": 0.8116483688354492, + "eval_precision": 0.9074876065725506, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.7488, + "eval_samples_per_second": 465.619, + "eval_steps_per_second": 3.743, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 4.2566887660302716e-05, + "loss": 0.6872, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 3.2139373160727306e-05, + "loss": 0.6873, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9143976714084776, + "eval_loss": 0.8085451722145081, + "eval_precision": 0.9132669065932635, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.9741, + "eval_samples_per_second": 452.463, + "eval_steps_per_second": 3.637, + "step": 552 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3407293024345204, + "learning_rate": 0.0005392250490698531, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-53jz7fuv/checkpoint-552/training_args.bin b/run-53jz7fuv/checkpoint-552/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b992a5eda32899c02eaf7e9a77b0b104aca18b6f --- /dev/null +++ b/run-53jz7fuv/checkpoint-552/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53613fc03f800bc6efd8ebe87570489c58660c4cb11c00d863074ecb837c2ee5 +size 4792 diff --git a/run-53jz7fuv/checkpoint-630/model.safetensors b/run-53jz7fuv/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f4640ad2816e969a152febe7014f02bdb58e089 --- /dev/null +++ b/run-53jz7fuv/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb12f398840b1283a34220f50637a718bf6018fbe6c4396d948c20146bc1fd0 +size 198025308 diff --git a/run-53jz7fuv/checkpoint-630/optimizer.pt b/run-53jz7fuv/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9eb99f1453a3ec71f5060f0f7e1f64c18c3754ed --- /dev/null +++ b/run-53jz7fuv/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8fb78e5d507f747a586f47e813f4b328e31e74838ba4eb1665df592b172a8d3 +size 395900602 diff --git a/run-53jz7fuv/checkpoint-630/rng_state.pth b/run-53jz7fuv/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-53jz7fuv/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-53jz7fuv/checkpoint-630/scheduler.pt b/run-53jz7fuv/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..26d68f40b9cfcc2f414d672a1ba181dd42796628 --- /dev/null +++ b/run-53jz7fuv/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db91ff9308f9445048ea4d7e662244d69c9532b8b2dc6eb288f7fce7ab98ca34 +size 1064 diff --git a/run-53jz7fuv/checkpoint-630/trainer_state.json b/run-53jz7fuv/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9b3e9ddf202c72d5a11f86766a7a2624c74a7c7b --- /dev/null +++ b/run-53jz7fuv/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9143976714084776, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-53jz7fuv/checkpoint-552", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.378869092534831e-05, + "loss": 1.2995, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1503448486328125, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.3107, + "eval_samples_per_second": 434.142, + "eval_steps_per_second": 3.489, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00014757738185069662, + "loss": 0.9464, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00022136607277604495, + "loss": 0.8285, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8406319290465631, + "eval_f1": 0.852736263314779, + "eval_loss": 0.947006344795227, + "eval_precision": 0.8854687716818802, + "eval_recall": 0.8406319290465631, + "eval_runtime": 8.0818, + "eval_samples_per_second": 446.436, + "eval_steps_per_second": 3.588, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00029515476370139325, + "loss": 0.8, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.895307019390465, + "eval_loss": 0.8166604042053223, + "eval_precision": 0.895600373172129, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.0961, + "eval_samples_per_second": 445.647, + "eval_steps_per_second": 3.582, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003689434546267416, + "loss": 0.7924, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004427321455520899, + "loss": 0.7794, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6599223946784922, + "eval_f1": 0.7102920067865718, + "eval_loss": 1.1635165214538574, + "eval_precision": 0.8726549957647445, + "eval_recall": 0.6599223946784922, + "eval_runtime": 7.6332, + "eval_samples_per_second": 472.674, + "eval_steps_per_second": 3.799, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005165208364774382, + "loss": 0.796, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005388486173762167, + "loss": 0.7757, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8805432372505543, + "eval_f1": 0.8773334790511929, + "eval_loss": 0.879237949848175, + "eval_precision": 0.8864864928693271, + "eval_recall": 0.8805432372505543, + "eval_runtime": 7.9933, + "eval_samples_per_second": 451.379, + "eval_steps_per_second": 3.628, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005369783571614465, + "loss": 0.778, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.875, + "eval_f1": 0.856802615183484, + "eval_loss": 0.8734626173973083, + "eval_precision": 0.8631760709471038, + "eval_recall": 0.875, + "eval_runtime": 8.2713, + "eval_samples_per_second": 436.207, + "eval_steps_per_second": 3.506, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005335507912224025, + "loss": 0.7731, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005285858838454891, + "loss": 0.7696, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.7804878048780488, + "eval_f1": 0.8094952278698527, + "eval_loss": 1.005456566810608, + "eval_precision": 0.8749780641719764, + "eval_recall": 0.7804878048780488, + "eval_runtime": 8.458, + "eval_samples_per_second": 426.578, + "eval_steps_per_second": 3.429, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005221125537545482, + "loss": 0.7617, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.000514168505670137, + "loss": 0.7535, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.871119733924612, + "eval_f1": 0.8637350836189582, + "eval_loss": 0.8737017512321472, + "eval_precision": 0.8691820561567153, + "eval_recall": 0.871119733924612, + "eval_runtime": 8.2672, + "eval_samples_per_second": 436.425, + "eval_steps_per_second": 3.508, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005048000106936684, + "loss": 0.7492, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8957871396895787, + "eval_f1": 0.8877932865479055, + "eval_loss": 0.8430231809616089, + "eval_precision": 0.8895626497267765, + "eval_recall": 0.8957871396895787, + "eval_runtime": 8.0069, + "eval_samples_per_second": 450.61, + "eval_steps_per_second": 3.622, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0004940616367955944, + "loss": 0.7515, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004820159309774396, + "loss": 0.7474, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8905210643015521, + "eval_f1": 0.8876570329441318, + "eval_loss": 0.8475670218467712, + "eval_precision": 0.8977710495990978, + "eval_recall": 0.8905210643015521, + "eval_runtime": 7.9501, + "eval_samples_per_second": 453.829, + "eval_steps_per_second": 3.648, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.000468733054958969, + "loss": 0.7451, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9029933481152993, + "eval_f1": 0.9014412435203255, + "eval_loss": 0.8295616507530212, + "eval_precision": 0.9006666247452533, + "eval_recall": 0.9029933481152993, + "eval_runtime": 8.1207, + "eval_samples_per_second": 444.296, + "eval_steps_per_second": 3.571, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0004542903765124787, + "loss": 0.7454, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0004387720188245314, + "loss": 0.7353, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8921840354767184, + "eval_f1": 0.8924850696189655, + "eval_loss": 0.8424952030181885, + "eval_precision": 0.8978571365485917, + "eval_recall": 0.8921840354767184, + "eval_runtime": 7.8206, + "eval_samples_per_second": 461.348, + "eval_steps_per_second": 3.708, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00042226837050993773, + "loss": 0.7265, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004048755591319665, + "loss": 0.7277, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8190133037694013, + "eval_f1": 0.8406730878971063, + "eval_loss": 0.9540680050849915, + "eval_precision": 0.8835019013446076, + "eval_recall": 0.8190133037694013, + "eval_runtime": 8.3485, + "eval_samples_per_second": 432.175, + "eval_steps_per_second": 3.474, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00038669489129533217, + "loss": 0.7298, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8988359201773836, + "eval_f1": 0.8936437037552686, + "eval_loss": 0.831924319267273, + "eval_precision": 0.8965423225960021, + "eval_recall": 0.8988359201773836, + "eval_runtime": 8.2585, + "eval_samples_per_second": 436.883, + "eval_steps_per_second": 3.512, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.000367832262573209, + "loss": 0.7231, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00034839754070522253, + "loss": 0.7201, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9044669532947774, + "eval_loss": 0.824361264705658, + "eval_precision": 0.9031627762580688, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.9484, + "eval_samples_per_second": 453.927, + "eval_steps_per_second": 3.649, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00032850392565906243, + "loss": 0.7173, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00030826729028311626, + "loss": 0.7214, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.8900885379859285, + "eval_loss": 0.8331730961799622, + "eval_precision": 0.8899670184070819, + "eval_recall": 0.9016075388026608, + "eval_runtime": 7.2986, + "eval_samples_per_second": 494.342, + "eval_steps_per_second": 3.973, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00028780550539057414, + "loss": 0.7114, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9054871730935844, + "eval_loss": 0.8191584348678589, + "eval_precision": 0.9037609511867994, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.1753, + "eval_samples_per_second": 441.328, + "eval_steps_per_second": 3.547, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002672377532061393, + "loss": 0.7104, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00024668383317425673, + "loss": 0.7059, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.9035577355537484, + "eval_loss": 0.8239393830299377, + "eval_precision": 0.9018666514340472, + "eval_recall": 0.9077050997782705, + "eval_runtime": 7.8397, + "eval_samples_per_second": 460.223, + "eval_steps_per_second": 3.699, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002262634641722694, + "loss": 0.7032, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00020609558719284705, + "loss": 0.7044, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9036268750748488, + "eval_loss": 0.8207970857620239, + "eval_precision": 0.9059613649467493, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.7152, + "eval_samples_per_second": 467.647, + "eval_steps_per_second": 3.759, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001862976725573018, + "loss": 0.7015, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.9009925002634688, + "eval_loss": 0.8348780870437622, + "eval_precision": 0.9005397621027742, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.6248, + "eval_samples_per_second": 473.196, + "eval_steps_per_second": 3.803, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00016698503569501257, + "loss": 0.7032, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00014827016547428785, + "loss": 0.6938, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9103315892112055, + "eval_loss": 0.8124563694000244, + "eval_precision": 0.9085347933280395, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.881, + "eval_samples_per_second": 457.807, + "eval_steps_per_second": 3.68, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00013026206899688556, + "loss": 0.6972, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9105816259077874, + "eval_loss": 0.8168894052505493, + "eval_precision": 0.9081968011566729, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.28, + "eval_samples_per_second": 435.748, + "eval_steps_per_second": 3.502, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00011306563667251915, + "loss": 0.6957, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 9.678103127154989e-05, + "loss": 0.6928, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9105142347847764, + "eval_loss": 0.8113499283790588, + "eval_precision": 0.9105825329516584, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8537, + "eval_samples_per_second": 459.402, + "eval_steps_per_second": 3.693, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 8.15031045144063e-05, + "loss": 0.691, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.732084459587836e-05, + "loss": 0.6919, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9087349483916861, + "eval_loss": 0.8187626600265503, + "eval_precision": 0.9075196437317545, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.0157, + "eval_samples_per_second": 450.115, + "eval_steps_per_second": 3.618, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 5.431685786224943e-05, + "loss": 0.6897, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.910268088752773, + "eval_loss": 0.8116483688354492, + "eval_precision": 0.9074876065725506, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.7488, + "eval_samples_per_second": 465.619, + "eval_steps_per_second": 3.743, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 4.2566887660302716e-05, + "loss": 0.6872, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 3.2139373160727306e-05, + "loss": 0.6873, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9143976714084776, + "eval_loss": 0.8085451722145081, + "eval_precision": 0.9132669065932635, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.9741, + "eval_samples_per_second": 452.463, + "eval_steps_per_second": 3.637, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.3095050725610417e-05, + "loss": 0.6877, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.5486600141901595e-05, + "loss": 0.6867, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9086632451350851, + "eval_loss": 0.8173710107803345, + "eval_precision": 0.9055352953824869, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.1642, + "eval_samples_per_second": 441.931, + "eval_steps_per_second": 3.552, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 9.358337781406752e-06, + "loss": 0.6875, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9074680441485458, + "eval_loss": 0.8134535551071167, + "eval_precision": 0.9041798945709816, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.2721, + "eval_samples_per_second": 436.164, + "eval_steps_per_second": 3.506, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 4.7459584745389585e-06, + "loss": 0.6896, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.6763276013153163e-06, + "loss": 0.688, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9137013021999896, + "eval_loss": 0.810537576675415, + "eval_precision": 0.9117123491688834, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.0124, + "eval_samples_per_second": 450.304, + "eval_steps_per_second": 3.619, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.6732461059182662e-07, + "loss": 0.6848, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9107507652356773, + "eval_loss": 0.8156982064247131, + "eval_precision": 0.9088706361540235, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.7675, + "eval_samples_per_second": 464.498, + "eval_steps_per_second": 3.733, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3407293024345204, + "learning_rate": 0.0005392250490698531, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-53jz7fuv/checkpoint-630/training_args.bin b/run-53jz7fuv/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b992a5eda32899c02eaf7e9a77b0b104aca18b6f --- /dev/null +++ b/run-53jz7fuv/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53613fc03f800bc6efd8ebe87570489c58660c4cb11c00d863074ecb837c2ee5 +size 4792 diff --git a/run-5gfo36to/checkpoint-595/model.safetensors b/run-5gfo36to/checkpoint-595/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf5dbc87e208bceba3780fcde7ac842ce64b15bf --- /dev/null +++ b/run-5gfo36to/checkpoint-595/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f6eb170eac45cb6d5a011f7dcac59cd43438199c179bb16802ce8a2f3884045 +size 198025308 diff --git a/run-5gfo36to/checkpoint-595/optimizer.pt b/run-5gfo36to/checkpoint-595/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..677f1cc9b18072010425f50ce821ec4b403e799f --- /dev/null +++ b/run-5gfo36to/checkpoint-595/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16aab8f07ebf4056ffe65c54421f9faa4a8c1590441cd447fb904ab1abbceb94 +size 395900602 diff --git a/run-5gfo36to/checkpoint-595/rng_state.pth b/run-5gfo36to/checkpoint-595/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b2798d3ef22ba33b35deea6a8c61abbb56099a6 --- /dev/null +++ b/run-5gfo36to/checkpoint-595/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4b46d5b7fd917d05ccb48b8b2f6f0c7b9f5cfd5e53675d2f6391274fc4f7a5 +size 14244 diff --git a/run-5gfo36to/checkpoint-595/scheduler.pt b/run-5gfo36to/checkpoint-595/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b25b1c93e64b19045e5a5241b5519a46c53fd18f --- /dev/null +++ b/run-5gfo36to/checkpoint-595/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8592d3b3a1166405780a915ea7fdc87a7aa6e7674c2c9d4f523a46b5c98a6ebe +size 1064 diff --git a/run-5gfo36to/checkpoint-595/trainer_state.json b/run-5gfo36to/checkpoint-595/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d3600904d6a715a38d48e3696c161aab2c1ad856 --- /dev/null +++ b/run-5gfo36to/checkpoint-595/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.8758816439309342, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-5gfo36to/checkpoint-595", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 595, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.962394700918074e-07, + "loss": 1.5547, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.3905210643015521, + "eval_f1": 0.4809261048868508, + "eval_loss": 1.5319658517837524, + "eval_precision": 0.6963017672243932, + "eval_recall": 0.3905210643015521, + "eval_runtime": 7.9549, + "eval_samples_per_second": 453.559, + "eval_steps_per_second": 3.646, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 7.924789401836148e-07, + "loss": 1.5425, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.188718410275422e-06, + "loss": 1.5134, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.5504434589800443, + "eval_f1": 0.611988234974916, + "eval_loss": 1.4773279428482056, + "eval_precision": 0.704575311080023, + "eval_recall": 0.5504434589800443, + "eval_runtime": 8.2113, + "eval_samples_per_second": 439.394, + "eval_steps_per_second": 3.532, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 1.5849578803672296e-06, + "loss": 1.4767, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.7330931263858093, + "eval_f1": 0.7213534975698069, + "eval_loss": 1.3926758766174316, + "eval_precision": 0.7115006206646903, + "eval_recall": 0.7330931263858093, + "eval_runtime": 8.3476, + "eval_samples_per_second": 432.222, + "eval_steps_per_second": 3.474, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 1.981197350459037e-06, + "loss": 1.4201, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 2.377436820550844e-06, + "loss": 1.3515, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8123614190687362, + "eval_f1": 0.7503480231384221, + "eval_loss": 1.2708595991134644, + "eval_precision": 0.7164835518781963, + "eval_recall": 0.8123614190687362, + "eval_runtime": 8.0306, + "eval_samples_per_second": 449.282, + "eval_steps_per_second": 3.611, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 2.7736762906426515e-06, + "loss": 1.2817, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 2.893574718988652e-06, + "loss": 1.194, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8270509977827051, + "eval_f1": 0.7500168180741826, + "eval_loss": 1.1383870840072632, + "eval_precision": 0.6861103059596184, + "eval_recall": 0.8270509977827051, + "eval_runtime": 7.9729, + "eval_samples_per_second": 452.531, + "eval_steps_per_second": 3.637, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 2.883531568647577e-06, + "loss": 1.1217, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 1.0325303077697754, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.0914, + "eval_samples_per_second": 445.907, + "eval_steps_per_second": 3.584, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 2.8651258089795334e-06, + "loss": 1.0554, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 2.838464646633214e-06, + "loss": 1.009, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9779973030090332, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.1199, + "eval_samples_per_second": 444.342, + "eval_steps_per_second": 3.571, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 2.8037033728824875e-06, + "loss": 0.9891, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 2.7610444591129577e-06, + "loss": 0.9542, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8298226164079823, + "eval_f1": 0.754104983609878, + "eval_loss": 0.9506354331970215, + "eval_precision": 0.756972187989509, + "eval_recall": 0.8298226164079823, + "eval_runtime": 7.6625, + "eval_samples_per_second": 470.866, + "eval_steps_per_second": 3.785, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 2.7107363775020604e-06, + "loss": 0.9379, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8389689578713969, + "eval_f1": 0.775357629944921, + "eval_loss": 0.9313869476318359, + "eval_precision": 0.7948947361531273, + "eval_recall": 0.8389689578713969, + "eval_runtime": 8.0899, + "eval_samples_per_second": 445.991, + "eval_steps_per_second": 3.585, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 2.653072153761796e-06, + "loss": 0.9394, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 2.5883876603738284e-06, + "loss": 0.9281, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8492239467849224, + "eval_f1": 0.795420343606662, + "eval_loss": 0.914733350276947, + "eval_precision": 0.8003855384949675, + "eval_recall": 0.8492239467849224, + "eval_runtime": 8.0341, + "eval_samples_per_second": 449.084, + "eval_steps_per_second": 3.61, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 2.5170596602582186e-06, + "loss": 0.9087, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8572616407982262, + "eval_f1": 0.808405624260162, + "eval_loss": 0.9006863236427307, + "eval_precision": 0.8469747892259065, + "eval_recall": 0.8572616407982262, + "eval_runtime": 7.8113, + "eval_samples_per_second": 461.895, + "eval_steps_per_second": 3.713, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 2.4395036122706834e-06, + "loss": 0.9153, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 2.356171251310543e-06, + "loss": 0.9029, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8650221729490022, + "eval_f1": 0.8210045749531748, + "eval_loss": 0.8931995034217834, + "eval_precision": 0.8580775297777307, + "eval_recall": 0.8650221729490022, + "eval_runtime": 8.1759, + "eval_samples_per_second": 441.298, + "eval_steps_per_second": 3.547, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 2.267547957134312e-06, + "loss": 0.8934, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 2.174149927200614e-06, + "loss": 0.8941, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8697339246119734, + "eval_f1": 0.8329071322867797, + "eval_loss": 0.8884183764457703, + "eval_precision": 0.8628635026938938, + "eval_recall": 0.8697339246119734, + "eval_runtime": 8.0072, + "eval_samples_per_second": 450.593, + "eval_steps_per_second": 3.622, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 2.0765211700135363e-06, + "loss": 0.8834, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8677937915742794, + "eval_f1": 0.8316428946508154, + "eval_loss": 0.8850321769714355, + "eval_precision": 0.8662689519570642, + "eval_recall": 0.8677937915742794, + "eval_runtime": 7.7568, + "eval_samples_per_second": 465.141, + "eval_steps_per_second": 3.739, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 1.9752303364770783e-06, + "loss": 0.8888, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 1.8708674077168503e-06, + "loss": 0.883, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8783259423503326, + "eval_f1": 0.8473787169767468, + "eval_loss": 0.8773753643035889, + "eval_precision": 0.8699827839076667, + "eval_recall": 0.8783259423503326, + "eval_runtime": 8.0218, + "eval_samples_per_second": 449.773, + "eval_steps_per_second": 3.615, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 1.7640402586612353e-06, + "loss": 0.8719, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 1.6553711173978626e-06, + "loss": 0.876, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8822062084257206, + "eval_f1": 0.8530468097025377, + "eval_loss": 0.8723440766334534, + "eval_precision": 0.8763556117724046, + "eval_recall": 0.8822062084257206, + "eval_runtime": 7.5736, + "eval_samples_per_second": 476.39, + "eval_steps_per_second": 3.829, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 1.5454929409283e-06, + "loss": 0.8725, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8813747228381374, + "eval_f1": 0.8548079113259014, + "eval_loss": 0.871452271938324, + "eval_precision": 0.8770164821564684, + "eval_recall": 0.8813747228381374, + "eval_runtime": 8.0293, + "eval_samples_per_second": 449.355, + "eval_steps_per_second": 3.612, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 1.4350457284308572e-06, + "loss": 0.872, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 1.3246727935053404e-06, + "loss": 0.8669, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.8822062084257206, + "eval_f1": 0.8581510439436856, + "eval_loss": 0.8683322668075562, + "eval_precision": 0.8791572304460623, + "eval_recall": 0.8822062084257206, + "eval_runtime": 7.8545, + "eval_samples_per_second": 459.356, + "eval_steps_per_second": 3.692, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 1.215017017112551e-06, + "loss": 0.864, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 1.1067171030337414e-06, + "loss": 0.8641, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.8827605321507761, + "eval_f1": 0.858961844982903, + "eval_loss": 0.8635474443435669, + "eval_precision": 0.8777211923157894, + "eval_recall": 0.8827605321507761, + "eval_runtime": 7.7614, + "eval_samples_per_second": 464.865, + "eval_steps_per_second": 3.736, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.0004038576605748e-06, + "loss": 0.8622, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8833148558758315, + "eval_f1": 0.8610389672722546, + "eval_loss": 0.8626967668533325, + "eval_precision": 0.8716167942336414, + "eval_recall": 0.8833148558758315, + "eval_runtime": 7.9018, + "eval_samples_per_second": 456.603, + "eval_steps_per_second": 3.67, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 8.966965157844205e-07, + "loss": 0.8642, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 7.961991337858808e-07, + "loss": 0.8614, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.885809312638581, + "eval_f1": 0.864803915777143, + "eval_loss": 0.8595779538154602, + "eval_precision": 0.8756200288833869, + "eval_recall": 0.885809312638581, + "eval_runtime": 7.9527, + "eval_samples_per_second": 453.684, + "eval_steps_per_second": 3.647, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 6.994970712328673e-07, + "loss": 0.8647, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.8871951219512195, + "eval_f1": 0.8658740529149358, + "eval_loss": 0.8626992106437683, + "eval_precision": 0.8795825768451412, + "eval_recall": 0.8871951219512195, + "eval_runtime": 7.5522, + "eval_samples_per_second": 477.74, + "eval_steps_per_second": 3.84, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.071535813806055e-07, + "loss": 0.8576, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.197065304326179e-07, + "loss": 0.8586, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.886640798226164, + "eval_f1": 0.8654711348871644, + "eval_loss": 0.8578057885169983, + "eval_precision": 0.8776669460701156, + "eval_recall": 0.886640798226164, + "eval_runtime": 7.2538, + "eval_samples_per_second": 497.398, + "eval_steps_per_second": 3.998, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 4.376652646717638e-07, + "loss": 0.8577, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.615076437091276e-07, + "loss": 0.8567, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.8852549889135255, + "eval_f1": 0.8660954780902751, + "eval_loss": 0.856066107749939, + "eval_precision": 0.8760845197403444, + "eval_recall": 0.8852549889135255, + "eval_runtime": 7.8452, + "eval_samples_per_second": 459.898, + "eval_steps_per_second": 3.697, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.916772571309598e-07, + "loss": 0.8536, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.8874722838137472, + "eval_f1": 0.8668388458748048, + "eval_loss": 0.8568962216377258, + "eval_precision": 0.8813653547492497, + "eval_recall": 0.8874722838137472, + "eval_runtime": 7.7878, + "eval_samples_per_second": 463.289, + "eval_steps_per_second": 3.724, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.285808407556625e-07, + "loss": 0.8551, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.7258590755015932e-07, + "loss": 0.8566, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.8927383592017738, + "eval_f1": 0.8747366192276731, + "eval_loss": 0.85516756772995, + "eval_precision": 0.8840382023110629, + "eval_recall": 0.8927383592017738, + "eval_runtime": 7.8267, + "eval_samples_per_second": 460.984, + "eval_steps_per_second": 3.705, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.2401860700466257e-07, + "loss": 0.8637, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 8.316182543418433e-08, + "loss": 0.8519, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.8896895787139689, + "eval_f1": 0.871177896326938, + "eval_loss": 0.8572620749473572, + "eval_precision": 0.8814473865092235, + "eval_recall": 0.8896895787139689, + "eval_runtime": 8.0055, + "eval_samples_per_second": 450.69, + "eval_steps_per_second": 3.623, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.025353827182356e-08, + "loss": 0.8491, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.8938470066518847, + "eval_f1": 0.8758816439309342, + "eval_loss": 0.8542837500572205, + "eval_precision": 0.8834578174777669, + "eval_recall": 0.8938470066518847, + "eval_runtime": 7.9669, + "eval_samples_per_second": 452.875, + "eval_steps_per_second": 3.64, + "step": 595 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4090181636067132, + "learning_rate": 2.895596127593977e-06, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-5gfo36to/checkpoint-595/training_args.bin b/run-5gfo36to/checkpoint-595/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a3f2087bfe72e6ddf3d53461a9f112198bcf38f --- /dev/null +++ b/run-5gfo36to/checkpoint-595/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26eeada6b1f4efaf658e711f25397094905fae49937b2de3b795d0b383556099 +size 4792 diff --git a/run-5gfo36to/checkpoint-630/model.safetensors b/run-5gfo36to/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b63be0d35866fb9484d984c8b6e4650a06fa38b5 --- /dev/null +++ b/run-5gfo36to/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba98937b969c38204e5c3108e43cd4d8b2151a0a9c4434ef57bbe14e090b33a6 +size 198025308 diff --git a/run-5gfo36to/checkpoint-630/optimizer.pt b/run-5gfo36to/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4659e7af3c60c81a0fcdce7fc3d4eaedefde034 --- /dev/null +++ b/run-5gfo36to/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79737e91aca949f3199e78a2ae01b5eff44ceee188b28448beb620f7a45501c +size 395900602 diff --git a/run-5gfo36to/checkpoint-630/rng_state.pth b/run-5gfo36to/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-5gfo36to/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-5gfo36to/checkpoint-630/scheduler.pt b/run-5gfo36to/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8d18918421b433fb80cca22eb9a82bbc7e5cc51 --- /dev/null +++ b/run-5gfo36to/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb42ae857f0ebacdac2967ce39c46d5bf92f957c342bebd81d5e52802e47ba06 +size 1064 diff --git a/run-5gfo36to/checkpoint-630/trainer_state.json b/run-5gfo36to/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d77791348942d8eb1f2c0a5a8604d66f59faa5a4 --- /dev/null +++ b/run-5gfo36to/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.8758816439309342, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-5gfo36to/checkpoint-595", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.962394700918074e-07, + "loss": 1.5547, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.3905210643015521, + "eval_f1": 0.4809261048868508, + "eval_loss": 1.5319658517837524, + "eval_precision": 0.6963017672243932, + "eval_recall": 0.3905210643015521, + "eval_runtime": 7.9549, + "eval_samples_per_second": 453.559, + "eval_steps_per_second": 3.646, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 7.924789401836148e-07, + "loss": 1.5425, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.188718410275422e-06, + "loss": 1.5134, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.5504434589800443, + "eval_f1": 0.611988234974916, + "eval_loss": 1.4773279428482056, + "eval_precision": 0.704575311080023, + "eval_recall": 0.5504434589800443, + "eval_runtime": 8.2113, + "eval_samples_per_second": 439.394, + "eval_steps_per_second": 3.532, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 1.5849578803672296e-06, + "loss": 1.4767, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.7330931263858093, + "eval_f1": 0.7213534975698069, + "eval_loss": 1.3926758766174316, + "eval_precision": 0.7115006206646903, + "eval_recall": 0.7330931263858093, + "eval_runtime": 8.3476, + "eval_samples_per_second": 432.222, + "eval_steps_per_second": 3.474, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 1.981197350459037e-06, + "loss": 1.4201, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 2.377436820550844e-06, + "loss": 1.3515, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8123614190687362, + "eval_f1": 0.7503480231384221, + "eval_loss": 1.2708595991134644, + "eval_precision": 0.7164835518781963, + "eval_recall": 0.8123614190687362, + "eval_runtime": 8.0306, + "eval_samples_per_second": 449.282, + "eval_steps_per_second": 3.611, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 2.7736762906426515e-06, + "loss": 1.2817, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 2.893574718988652e-06, + "loss": 1.194, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8270509977827051, + "eval_f1": 0.7500168180741826, + "eval_loss": 1.1383870840072632, + "eval_precision": 0.6861103059596184, + "eval_recall": 0.8270509977827051, + "eval_runtime": 7.9729, + "eval_samples_per_second": 452.531, + "eval_steps_per_second": 3.637, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 2.883531568647577e-06, + "loss": 1.1217, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 1.0325303077697754, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.0914, + "eval_samples_per_second": 445.907, + "eval_steps_per_second": 3.584, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 2.8651258089795334e-06, + "loss": 1.0554, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 2.838464646633214e-06, + "loss": 1.009, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9779973030090332, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.1199, + "eval_samples_per_second": 444.342, + "eval_steps_per_second": 3.571, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 2.8037033728824875e-06, + "loss": 0.9891, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 2.7610444591129577e-06, + "loss": 0.9542, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8298226164079823, + "eval_f1": 0.754104983609878, + "eval_loss": 0.9506354331970215, + "eval_precision": 0.756972187989509, + "eval_recall": 0.8298226164079823, + "eval_runtime": 7.6625, + "eval_samples_per_second": 470.866, + "eval_steps_per_second": 3.785, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 2.7107363775020604e-06, + "loss": 0.9379, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8389689578713969, + "eval_f1": 0.775357629944921, + "eval_loss": 0.9313869476318359, + "eval_precision": 0.7948947361531273, + "eval_recall": 0.8389689578713969, + "eval_runtime": 8.0899, + "eval_samples_per_second": 445.991, + "eval_steps_per_second": 3.585, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 2.653072153761796e-06, + "loss": 0.9394, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 2.5883876603738284e-06, + "loss": 0.9281, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8492239467849224, + "eval_f1": 0.795420343606662, + "eval_loss": 0.914733350276947, + "eval_precision": 0.8003855384949675, + "eval_recall": 0.8492239467849224, + "eval_runtime": 8.0341, + "eval_samples_per_second": 449.084, + "eval_steps_per_second": 3.61, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 2.5170596602582186e-06, + "loss": 0.9087, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8572616407982262, + "eval_f1": 0.808405624260162, + "eval_loss": 0.9006863236427307, + "eval_precision": 0.8469747892259065, + "eval_recall": 0.8572616407982262, + "eval_runtime": 7.8113, + "eval_samples_per_second": 461.895, + "eval_steps_per_second": 3.713, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 2.4395036122706834e-06, + "loss": 0.9153, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 2.356171251310543e-06, + "loss": 0.9029, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8650221729490022, + "eval_f1": 0.8210045749531748, + "eval_loss": 0.8931995034217834, + "eval_precision": 0.8580775297777307, + "eval_recall": 0.8650221729490022, + "eval_runtime": 8.1759, + "eval_samples_per_second": 441.298, + "eval_steps_per_second": 3.547, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 2.267547957134312e-06, + "loss": 0.8934, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 2.174149927200614e-06, + "loss": 0.8941, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8697339246119734, + "eval_f1": 0.8329071322867797, + "eval_loss": 0.8884183764457703, + "eval_precision": 0.8628635026938938, + "eval_recall": 0.8697339246119734, + "eval_runtime": 8.0072, + "eval_samples_per_second": 450.593, + "eval_steps_per_second": 3.622, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 2.0765211700135363e-06, + "loss": 0.8834, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8677937915742794, + "eval_f1": 0.8316428946508154, + "eval_loss": 0.8850321769714355, + "eval_precision": 0.8662689519570642, + "eval_recall": 0.8677937915742794, + "eval_runtime": 7.7568, + "eval_samples_per_second": 465.141, + "eval_steps_per_second": 3.739, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 1.9752303364770783e-06, + "loss": 0.8888, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 1.8708674077168503e-06, + "loss": 0.883, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8783259423503326, + "eval_f1": 0.8473787169767468, + "eval_loss": 0.8773753643035889, + "eval_precision": 0.8699827839076667, + "eval_recall": 0.8783259423503326, + "eval_runtime": 8.0218, + "eval_samples_per_second": 449.773, + "eval_steps_per_second": 3.615, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 1.7640402586612353e-06, + "loss": 0.8719, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 1.6553711173978626e-06, + "loss": 0.876, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8822062084257206, + "eval_f1": 0.8530468097025377, + "eval_loss": 0.8723440766334534, + "eval_precision": 0.8763556117724046, + "eval_recall": 0.8822062084257206, + "eval_runtime": 7.5736, + "eval_samples_per_second": 476.39, + "eval_steps_per_second": 3.829, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 1.5454929409283e-06, + "loss": 0.8725, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8813747228381374, + "eval_f1": 0.8548079113259014, + "eval_loss": 0.871452271938324, + "eval_precision": 0.8770164821564684, + "eval_recall": 0.8813747228381374, + "eval_runtime": 8.0293, + "eval_samples_per_second": 449.355, + "eval_steps_per_second": 3.612, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 1.4350457284308572e-06, + "loss": 0.872, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 1.3246727935053404e-06, + "loss": 0.8669, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.8822062084257206, + "eval_f1": 0.8581510439436856, + "eval_loss": 0.8683322668075562, + "eval_precision": 0.8791572304460623, + "eval_recall": 0.8822062084257206, + "eval_runtime": 7.8545, + "eval_samples_per_second": 459.356, + "eval_steps_per_second": 3.692, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 1.215017017112551e-06, + "loss": 0.864, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 1.1067171030337414e-06, + "loss": 0.8641, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.8827605321507761, + "eval_f1": 0.858961844982903, + "eval_loss": 0.8635474443435669, + "eval_precision": 0.8777211923157894, + "eval_recall": 0.8827605321507761, + "eval_runtime": 7.7614, + "eval_samples_per_second": 464.865, + "eval_steps_per_second": 3.736, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.0004038576605748e-06, + "loss": 0.8622, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8833148558758315, + "eval_f1": 0.8610389672722546, + "eval_loss": 0.8626967668533325, + "eval_precision": 0.8716167942336414, + "eval_recall": 0.8833148558758315, + "eval_runtime": 7.9018, + "eval_samples_per_second": 456.603, + "eval_steps_per_second": 3.67, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 8.966965157844205e-07, + "loss": 0.8642, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 7.961991337858808e-07, + "loss": 0.8614, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.885809312638581, + "eval_f1": 0.864803915777143, + "eval_loss": 0.8595779538154602, + "eval_precision": 0.8756200288833869, + "eval_recall": 0.885809312638581, + "eval_runtime": 7.9527, + "eval_samples_per_second": 453.684, + "eval_steps_per_second": 3.647, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 6.994970712328673e-07, + "loss": 0.8647, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.8871951219512195, + "eval_f1": 0.8658740529149358, + "eval_loss": 0.8626992106437683, + "eval_precision": 0.8795825768451412, + "eval_recall": 0.8871951219512195, + "eval_runtime": 7.5522, + "eval_samples_per_second": 477.74, + "eval_steps_per_second": 3.84, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.071535813806055e-07, + "loss": 0.8576, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.197065304326179e-07, + "loss": 0.8586, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.886640798226164, + "eval_f1": 0.8654711348871644, + "eval_loss": 0.8578057885169983, + "eval_precision": 0.8776669460701156, + "eval_recall": 0.886640798226164, + "eval_runtime": 7.2538, + "eval_samples_per_second": 497.398, + "eval_steps_per_second": 3.998, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 4.376652646717638e-07, + "loss": 0.8577, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.615076437091276e-07, + "loss": 0.8567, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.8852549889135255, + "eval_f1": 0.8660954780902751, + "eval_loss": 0.856066107749939, + "eval_precision": 0.8760845197403444, + "eval_recall": 0.8852549889135255, + "eval_runtime": 7.8452, + "eval_samples_per_second": 459.898, + "eval_steps_per_second": 3.697, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.916772571309598e-07, + "loss": 0.8536, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.8874722838137472, + "eval_f1": 0.8668388458748048, + "eval_loss": 0.8568962216377258, + "eval_precision": 0.8813653547492497, + "eval_recall": 0.8874722838137472, + "eval_runtime": 7.7878, + "eval_samples_per_second": 463.289, + "eval_steps_per_second": 3.724, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.285808407556625e-07, + "loss": 0.8551, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.7258590755015932e-07, + "loss": 0.8566, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.8927383592017738, + "eval_f1": 0.8747366192276731, + "eval_loss": 0.85516756772995, + "eval_precision": 0.8840382023110629, + "eval_recall": 0.8927383592017738, + "eval_runtime": 7.8267, + "eval_samples_per_second": 460.984, + "eval_steps_per_second": 3.705, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.2401860700466257e-07, + "loss": 0.8637, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 8.316182543418433e-08, + "loss": 0.8519, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.8896895787139689, + "eval_f1": 0.871177896326938, + "eval_loss": 0.8572620749473572, + "eval_precision": 0.8814473865092235, + "eval_recall": 0.8896895787139689, + "eval_runtime": 8.0055, + "eval_samples_per_second": 450.69, + "eval_steps_per_second": 3.623, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.025353827182356e-08, + "loss": 0.8491, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.8938470066518847, + "eval_f1": 0.8758816439309342, + "eval_loss": 0.8542837500572205, + "eval_precision": 0.8834578174777669, + "eval_recall": 0.8938470066518847, + "eval_runtime": 7.9669, + "eval_samples_per_second": 452.875, + "eval_steps_per_second": 3.64, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.5485423951098e-08, + "loss": 0.8635, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 9.00174745093997e-09, + "loss": 0.8537, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.8910753880266076, + "eval_f1": 0.871703951430068, + "eval_loss": 0.8537372946739197, + "eval_precision": 0.8824910875523956, + "eval_recall": 0.8910753880266076, + "eval_runtime": 7.7918, + "eval_samples_per_second": 463.049, + "eval_steps_per_second": 3.722, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 8.985200062879478e-10, + "loss": 0.8568, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.8919068736141907, + "eval_f1": 0.8721948757043176, + "eval_loss": 0.8553429841995239, + "eval_precision": 0.8868766048196579, + "eval_recall": 0.8919068736141907, + "eval_runtime": 7.4664, + "eval_samples_per_second": 483.231, + "eval_steps_per_second": 3.884, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4090181636067132, + "learning_rate": 2.895596127593977e-06, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-5gfo36to/checkpoint-630/training_args.bin b/run-5gfo36to/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a3f2087bfe72e6ddf3d53461a9f112198bcf38f --- /dev/null +++ b/run-5gfo36to/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26eeada6b1f4efaf658e711f25397094905fae49937b2de3b795d0b383556099 +size 4792 diff --git a/run-622twmx8/checkpoint-531/model.safetensors b/run-622twmx8/checkpoint-531/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f04e96002f3e51836c78783ea705d7a873abe94f --- /dev/null +++ b/run-622twmx8/checkpoint-531/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd8e16108bc65239fb5861e120240b205952a728d637555306d349552fdeeae +size 198025308 diff --git a/run-622twmx8/checkpoint-531/optimizer.pt b/run-622twmx8/checkpoint-531/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4dfa98123d238c70cf8da8308e66888dd7817fcc --- /dev/null +++ b/run-622twmx8/checkpoint-531/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452e756b55dc90960499d2aed4a80622144046d2ff23048954b30d63f91d40aa +size 395900602 diff --git a/run-622twmx8/checkpoint-531/rng_state.pth b/run-622twmx8/checkpoint-531/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f50e97f6b8f8d895fe76d5cec6c2f434c3883a4 --- /dev/null +++ b/run-622twmx8/checkpoint-531/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ebc248f8c573102043dc64b4a3df9b7043de65474577f599703b3d52b79074 +size 14244 diff --git a/run-622twmx8/checkpoint-531/scheduler.pt b/run-622twmx8/checkpoint-531/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..23d42b6c7e6fb50758ab070fee2ed6808f5be0a4 --- /dev/null +++ b/run-622twmx8/checkpoint-531/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f095fa5c696ea068c7463f4b4e0a8b730819f50a20da55de3efcb01e961f5286 +size 1064 diff --git a/run-622twmx8/checkpoint-531/trainer_state.json b/run-622twmx8/checkpoint-531/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0416600312b3b0e5fa87a19fa5364ac2fa3de7e0 --- /dev/null +++ b/run-622twmx8/checkpoint-531/trainer_state.json @@ -0,0 +1,568 @@ +{ + "best_metric": 0.9191547933551557, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-622twmx8/checkpoint-531", + "epoch": 24.988235294117647, + "eval_steps": 500, + "global_step": 531, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.655445808960857e-06, + "loss": 1.518, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.7699556541019955, + "eval_f1": 0.730193831934965, + "eval_loss": 1.3581897020339966, + "eval_precision": 0.7011797709568601, + "eval_recall": 0.7699556541019955, + "eval_runtime": 7.792, + "eval_samples_per_second": 463.04, + "eval_steps_per_second": 3.722, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 9.310891617921714e-06, + "loss": 1.3894, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.396633742688257e-05, + "loss": 1.1386, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9885793328285217, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.0476, + "eval_samples_per_second": 448.331, + "eval_steps_per_second": 3.604, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 1.8621783235843428e-05, + "loss": 0.9729, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8672394678492239, + "eval_f1": 0.8259575167717805, + "eval_loss": 0.9051811695098877, + "eval_precision": 0.8604461389749134, + "eval_recall": 0.8672394678492239, + "eval_runtime": 8.3699, + "eval_samples_per_second": 431.069, + "eval_steps_per_second": 3.465, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 2.3277229044804283e-05, + "loss": 0.9139, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 2.793267485376514e-05, + "loss": 0.8689, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.8885934705997331, + "eval_loss": 0.8600777983665466, + "eval_precision": 0.8930838492785054, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.0749, + "eval_samples_per_second": 446.818, + "eval_steps_per_second": 3.591, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 3.258812066272599e-05, + "loss": 0.85, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 3.399681585307402e-05, + "loss": 0.8135, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8959676976776392, + "eval_loss": 0.8279750347137451, + "eval_precision": 0.8939639289290128, + "eval_recall": 0.9013303769401331, + "eval_runtime": 7.6287, + "eval_samples_per_second": 472.951, + "eval_steps_per_second": 3.801, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 3.387881816305771e-05, + "loss": 0.805, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9125454105000532, + "eval_loss": 0.8099972009658813, + "eval_precision": 0.9088097502654775, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8442, + "eval_samples_per_second": 459.957, + "eval_steps_per_second": 3.697, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 3.366256757931985e-05, + "loss": 0.796, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 3.3349323680425984e-05, + "loss": 0.792, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9074279379157428, + "eval_f1": 0.9004171098091903, + "eval_loss": 0.8030616044998169, + "eval_precision": 0.8960958785882981, + "eval_recall": 0.9074279379157428, + "eval_runtime": 7.9912, + "eval_samples_per_second": 451.498, + "eval_steps_per_second": 3.629, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 3.2940910994634065e-05, + "loss": 0.7885, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 3.243970837270151e-05, + "loss": 0.7777, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9056248715383994, + "eval_loss": 0.8066438436508179, + "eval_precision": 0.9017662157183216, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.1376, + "eval_samples_per_second": 443.373, + "eval_steps_per_second": 3.564, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 3.184863513197149e-05, + "loss": 0.7712, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.910503075140062, + "eval_loss": 0.7965097427368164, + "eval_precision": 0.9071623382442995, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.7164, + "eval_samples_per_second": 467.576, + "eval_steps_per_second": 3.758, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 3.117113405244401e-05, + "loss": 0.7766, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 3.0411151323873334e-05, + "loss": 0.7701, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.912323379125128, + "eval_loss": 0.7965329885482788, + "eval_precision": 0.9086968327576164, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.1521, + "eval_samples_per_second": 442.584, + "eval_steps_per_second": 3.557, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 2.9573113560692303e-05, + "loss": 0.7586, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9133083297926875, + "eval_loss": 0.7989745140075684, + "eval_precision": 0.9108040149566228, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.9122, + "eval_samples_per_second": 456.005, + "eval_steps_per_second": 3.665, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 2.8661902018643042e-05, + "loss": 0.7739, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 2.768282416329256e-05, + "loss": 0.7623, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9122268186163894, + "eval_loss": 0.7990837693214417, + "eval_precision": 0.9092123192603685, + "eval_recall": 0.9157427937915743, + "eval_runtime": 7.9197, + "eval_samples_per_second": 455.571, + "eval_steps_per_second": 3.662, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 2.664158275603587e-05, + "loss": 0.752, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 2.5544242637649144e-05, + "loss": 0.7542, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9102584143137299, + "eval_loss": 0.8025537133216858, + "eval_precision": 0.9072283465411942, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.1073, + "eval_samples_per_second": 445.032, + "eval_steps_per_second": 3.577, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 2.4397195402866275e-05, + "loss": 0.7499, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9099722476500224, + "eval_loss": 0.8010039925575256, + "eval_precision": 0.9073922355185179, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.1149, + "eval_samples_per_second": 444.612, + "eval_steps_per_second": 3.574, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 2.3207122171736127e-05, + "loss": 0.7511, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 2.1980954674603364e-05, + "loss": 0.7512, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.912092192514154, + "eval_loss": 0.7963768243789673, + "eval_precision": 0.9093645944675653, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.9919, + "eval_samples_per_second": 451.457, + "eval_steps_per_second": 3.629, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 2.0725834877378293e-05, + "loss": 0.7465, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 1.944907338226341e-05, + "loss": 0.7504, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9155331166207425, + "eval_loss": 0.7937688231468201, + "eval_precision": 0.9122034279467051, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.1294, + "eval_samples_per_second": 443.822, + "eval_steps_per_second": 3.567, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 1.8158106846236683e-05, + "loss": 0.7418, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9147375032517198, + "eval_loss": 0.7910325527191162, + "eval_precision": 0.9108467801837773, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.8647, + "eval_samples_per_second": 458.756, + "eval_steps_per_second": 3.687, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 1.686045466531312e-05, + "loss": 0.746, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 1.556367517688242e-05, + "loss": 0.7396, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9118930192007607, + "eval_loss": 0.7957426905632019, + "eval_precision": 0.9080113461993669, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.8851, + "eval_samples_per_second": 457.569, + "eval_steps_per_second": 3.678, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 1.4275321635227725e-05, + "loss": 0.7378, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 1.3002898216651585e-05, + "loss": 0.7369, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9139042938361129, + "eval_loss": 0.7959333658218384, + "eval_precision": 0.9108347139698648, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.1051, + "eval_samples_per_second": 445.149, + "eval_steps_per_second": 3.578, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.1753816310462735e-05, + "loss": 0.7355, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9103949936701875, + "eval_loss": 0.8054380416870117, + "eval_precision": 0.9084407745369095, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.0357, + "eval_samples_per_second": 448.997, + "eval_steps_per_second": 3.609, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.0535351350412314e-05, + "loss": 0.7347, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 9.354600438020275e-06, + "loss": 0.7362, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9163882489977903, + "eval_loss": 0.7922390103340149, + "eval_precision": 0.9136094059657969, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.8219, + "eval_samples_per_second": 461.268, + "eval_steps_per_second": 3.708, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 8.21844100462009e-06, + "loss": 0.7351, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9105055972339051, + "eval_loss": 0.8012438416481018, + "eval_precision": 0.9064071363486025, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.1515, + "eval_samples_per_second": 442.619, + "eval_steps_per_second": 3.558, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 7.133490752900026e-06, + "loss": 0.7337, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 6.1060691112663175e-06, + "loss": 0.7336, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9133015657971577, + "eval_loss": 0.7941232919692993, + "eval_precision": 0.9158980010099637, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.0563, + "eval_samples_per_second": 447.846, + "eval_steps_per_second": 3.6, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 5.142160425542207e-06, + "loss": 0.7309, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 4.247379102397426e-06, + "loss": 0.7332, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.915658478968544, + "eval_loss": 0.7949287295341492, + "eval_precision": 0.9117468789379483, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.9184, + "eval_samples_per_second": 455.647, + "eval_steps_per_second": 3.662, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.4269369075344934e-06, + "loss": 0.7318, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9240576496674058, + "eval_f1": 0.9191547933551557, + "eval_loss": 0.7948815226554871, + "eval_precision": 0.9153474010787129, + "eval_recall": 0.9240576496674058, + "eval_runtime": 8.189, + "eval_samples_per_second": 440.59, + "eval_steps_per_second": 3.541, + "step": 531 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.2229288400535888, + "learning_rate": 3.4020565527021645e-05, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-622twmx8/checkpoint-531/training_args.bin b/run-622twmx8/checkpoint-531/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..22e4b23d63f6cf0330f33ec3e135d3f8fae4d1f1 --- /dev/null +++ b/run-622twmx8/checkpoint-531/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5513b20a2b05fb35b2fa7de2262a999a106881d8dc1ce67434efb8bf85f630 +size 4792 diff --git a/run-622twmx8/checkpoint-630/model.safetensors b/run-622twmx8/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da711f40e1b6b4ded8d0e2208a80b66c41882598 --- /dev/null +++ b/run-622twmx8/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b6f58d19571c59221ba735dead1ac814a44e9028bb00af8708d0b4139730fe +size 198025308 diff --git a/run-622twmx8/checkpoint-630/optimizer.pt b/run-622twmx8/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0df0a1f67a95c178678058f8f4409f51c290d27 --- /dev/null +++ b/run-622twmx8/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65def9e663e5e891cd5cc510392aa83ff78b4508cc9788cdc4af2e5c0e8902cd +size 395900602 diff --git a/run-622twmx8/checkpoint-630/rng_state.pth b/run-622twmx8/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-622twmx8/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-622twmx8/checkpoint-630/scheduler.pt b/run-622twmx8/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d45928140f11b0b50f4312a0287c6b524ebe530 --- /dev/null +++ b/run-622twmx8/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89b4605715690b8243cfb783c3738025ab210d6a352e08faf49f62d66172cdc +size 1064 diff --git a/run-622twmx8/checkpoint-630/trainer_state.json b/run-622twmx8/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fd6036cb448751d1512f1b893f03e78c6f6d6ae1 --- /dev/null +++ b/run-622twmx8/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9191547933551557, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-622twmx8/checkpoint-531", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.655445808960857e-06, + "loss": 1.518, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.7699556541019955, + "eval_f1": 0.730193831934965, + "eval_loss": 1.3581897020339966, + "eval_precision": 0.7011797709568601, + "eval_recall": 0.7699556541019955, + "eval_runtime": 7.792, + "eval_samples_per_second": 463.04, + "eval_steps_per_second": 3.722, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 9.310891617921714e-06, + "loss": 1.3894, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.396633742688257e-05, + "loss": 1.1386, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9885793328285217, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.0476, + "eval_samples_per_second": 448.331, + "eval_steps_per_second": 3.604, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 1.8621783235843428e-05, + "loss": 0.9729, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8672394678492239, + "eval_f1": 0.8259575167717805, + "eval_loss": 0.9051811695098877, + "eval_precision": 0.8604461389749134, + "eval_recall": 0.8672394678492239, + "eval_runtime": 8.3699, + "eval_samples_per_second": 431.069, + "eval_steps_per_second": 3.465, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 2.3277229044804283e-05, + "loss": 0.9139, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 2.793267485376514e-05, + "loss": 0.8689, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.8885934705997331, + "eval_loss": 0.8600777983665466, + "eval_precision": 0.8930838492785054, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.0749, + "eval_samples_per_second": 446.818, + "eval_steps_per_second": 3.591, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 3.258812066272599e-05, + "loss": 0.85, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 3.399681585307402e-05, + "loss": 0.8135, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8959676976776392, + "eval_loss": 0.8279750347137451, + "eval_precision": 0.8939639289290128, + "eval_recall": 0.9013303769401331, + "eval_runtime": 7.6287, + "eval_samples_per_second": 472.951, + "eval_steps_per_second": 3.801, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 3.387881816305771e-05, + "loss": 0.805, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9125454105000532, + "eval_loss": 0.8099972009658813, + "eval_precision": 0.9088097502654775, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8442, + "eval_samples_per_second": 459.957, + "eval_steps_per_second": 3.697, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 3.366256757931985e-05, + "loss": 0.796, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 3.3349323680425984e-05, + "loss": 0.792, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9074279379157428, + "eval_f1": 0.9004171098091903, + "eval_loss": 0.8030616044998169, + "eval_precision": 0.8960958785882981, + "eval_recall": 0.9074279379157428, + "eval_runtime": 7.9912, + "eval_samples_per_second": 451.498, + "eval_steps_per_second": 3.629, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 3.2940910994634065e-05, + "loss": 0.7885, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 3.243970837270151e-05, + "loss": 0.7777, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9056248715383994, + "eval_loss": 0.8066438436508179, + "eval_precision": 0.9017662157183216, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.1376, + "eval_samples_per_second": 443.373, + "eval_steps_per_second": 3.564, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 3.184863513197149e-05, + "loss": 0.7712, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.910503075140062, + "eval_loss": 0.7965097427368164, + "eval_precision": 0.9071623382442995, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.7164, + "eval_samples_per_second": 467.576, + "eval_steps_per_second": 3.758, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 3.117113405244401e-05, + "loss": 0.7766, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 3.0411151323873334e-05, + "loss": 0.7701, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.912323379125128, + "eval_loss": 0.7965329885482788, + "eval_precision": 0.9086968327576164, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.1521, + "eval_samples_per_second": 442.584, + "eval_steps_per_second": 3.557, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 2.9573113560692303e-05, + "loss": 0.7586, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9133083297926875, + "eval_loss": 0.7989745140075684, + "eval_precision": 0.9108040149566228, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.9122, + "eval_samples_per_second": 456.005, + "eval_steps_per_second": 3.665, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 2.8661902018643042e-05, + "loss": 0.7739, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 2.768282416329256e-05, + "loss": 0.7623, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9122268186163894, + "eval_loss": 0.7990837693214417, + "eval_precision": 0.9092123192603685, + "eval_recall": 0.9157427937915743, + "eval_runtime": 7.9197, + "eval_samples_per_second": 455.571, + "eval_steps_per_second": 3.662, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 2.664158275603587e-05, + "loss": 0.752, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 2.5544242637649144e-05, + "loss": 0.7542, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9102584143137299, + "eval_loss": 0.8025537133216858, + "eval_precision": 0.9072283465411942, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.1073, + "eval_samples_per_second": 445.032, + "eval_steps_per_second": 3.577, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 2.4397195402866275e-05, + "loss": 0.7499, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9099722476500224, + "eval_loss": 0.8010039925575256, + "eval_precision": 0.9073922355185179, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.1149, + "eval_samples_per_second": 444.612, + "eval_steps_per_second": 3.574, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 2.3207122171736127e-05, + "loss": 0.7511, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 2.1980954674603364e-05, + "loss": 0.7512, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.912092192514154, + "eval_loss": 0.7963768243789673, + "eval_precision": 0.9093645944675653, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.9919, + "eval_samples_per_second": 451.457, + "eval_steps_per_second": 3.629, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 2.0725834877378293e-05, + "loss": 0.7465, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 1.944907338226341e-05, + "loss": 0.7504, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9155331166207425, + "eval_loss": 0.7937688231468201, + "eval_precision": 0.9122034279467051, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.1294, + "eval_samples_per_second": 443.822, + "eval_steps_per_second": 3.567, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 1.8158106846236683e-05, + "loss": 0.7418, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9147375032517198, + "eval_loss": 0.7910325527191162, + "eval_precision": 0.9108467801837773, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.8647, + "eval_samples_per_second": 458.756, + "eval_steps_per_second": 3.687, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 1.686045466531312e-05, + "loss": 0.746, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 1.556367517688242e-05, + "loss": 0.7396, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9118930192007607, + "eval_loss": 0.7957426905632019, + "eval_precision": 0.9080113461993669, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.8851, + "eval_samples_per_second": 457.569, + "eval_steps_per_second": 3.678, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 1.4275321635227725e-05, + "loss": 0.7378, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 1.3002898216651585e-05, + "loss": 0.7369, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9139042938361129, + "eval_loss": 0.7959333658218384, + "eval_precision": 0.9108347139698648, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.1051, + "eval_samples_per_second": 445.149, + "eval_steps_per_second": 3.578, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.1753816310462735e-05, + "loss": 0.7355, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9103949936701875, + "eval_loss": 0.8054380416870117, + "eval_precision": 0.9084407745369095, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.0357, + "eval_samples_per_second": 448.997, + "eval_steps_per_second": 3.609, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.0535351350412314e-05, + "loss": 0.7347, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 9.354600438020275e-06, + "loss": 0.7362, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9163882489977903, + "eval_loss": 0.7922390103340149, + "eval_precision": 0.9136094059657969, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.8219, + "eval_samples_per_second": 461.268, + "eval_steps_per_second": 3.708, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 8.21844100462009e-06, + "loss": 0.7351, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9105055972339051, + "eval_loss": 0.8012438416481018, + "eval_precision": 0.9064071363486025, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.1515, + "eval_samples_per_second": 442.619, + "eval_steps_per_second": 3.558, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 7.133490752900026e-06, + "loss": 0.7337, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 6.1060691112663175e-06, + "loss": 0.7336, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9133015657971577, + "eval_loss": 0.7941232919692993, + "eval_precision": 0.9158980010099637, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.0563, + "eval_samples_per_second": 447.846, + "eval_steps_per_second": 3.6, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 5.142160425542207e-06, + "loss": 0.7309, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 4.247379102397426e-06, + "loss": 0.7332, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.915658478968544, + "eval_loss": 0.7949287295341492, + "eval_precision": 0.9117468789379483, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.9184, + "eval_samples_per_second": 455.647, + "eval_steps_per_second": 3.662, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.4269369075344934e-06, + "loss": 0.7318, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9240576496674058, + "eval_f1": 0.9191547933551557, + "eval_loss": 0.7948815226554871, + "eval_precision": 0.9153474010787129, + "eval_recall": 0.9240576496674058, + "eval_runtime": 8.189, + "eval_samples_per_second": 440.59, + "eval_steps_per_second": 3.541, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.6856126091076664e-06, + "loss": 0.7257, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.0277241431903165e-06, + "loss": 0.7323, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9215631929046563, + "eval_f1": 0.9174413322760164, + "eval_loss": 0.7940391898155212, + "eval_precision": 0.9140022437905087, + "eval_recall": 0.9215631929046563, + "eval_runtime": 7.93, + "eval_samples_per_second": 454.983, + "eval_steps_per_second": 3.657, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.45710346341632e-06, + "loss": 0.7328, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.770742212869535e-07, + "loss": 0.7271, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9123573295149654, + "eval_loss": 0.8007824420928955, + "eval_precision": 0.9092285637778, + "eval_recall": 0.9157427937915743, + "eval_runtime": 7.9472, + "eval_samples_per_second": 453.999, + "eval_steps_per_second": 3.649, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.90432407147145e-07, + "loss": 0.7289, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9215631929046563, + "eval_f1": 0.9174976723310397, + "eval_loss": 0.7948368787765503, + "eval_precision": 0.9137802618962579, + "eval_recall": 0.9215631929046563, + "eval_runtime": 8.1084, + "eval_samples_per_second": 444.969, + "eval_steps_per_second": 3.577, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.9943006459008216e-07, + "loss": 0.7326, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.0576217314769982e-07, + "loss": 0.7311, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.9180960257477001, + "eval_loss": 0.7923139929771423, + "eval_precision": 0.9141630974424577, + "eval_recall": 0.9226718403547672, + "eval_runtime": 7.7777, + "eval_samples_per_second": 463.89, + "eval_steps_per_second": 3.729, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.0556775670458873e-08, + "loss": 0.729, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9131124863236949, + "eval_loss": 0.7974818348884583, + "eval_precision": 0.9156131620599434, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.999, + "eval_samples_per_second": 451.059, + "eval_steps_per_second": 3.625, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.2229288400535888, + "learning_rate": 3.4020565527021645e-05, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-622twmx8/checkpoint-630/training_args.bin b/run-622twmx8/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..22e4b23d63f6cf0330f33ec3e135d3f8fae4d1f1 --- /dev/null +++ b/run-622twmx8/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5513b20a2b05fb35b2fa7de2262a999a106881d8dc1ce67434efb8bf85f630 +size 4792 diff --git a/run-78r1q4qm/checkpoint-616/model.safetensors b/run-78r1q4qm/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9286bd58c54d8e3240dffc42dc2111f5cc2779f --- /dev/null +++ b/run-78r1q4qm/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b561f83cae0a1068214d5b2632dce9633f69e1649de21a99dd01ec67a28fe69b +size 198025308 diff --git a/run-78r1q4qm/checkpoint-616/optimizer.pt b/run-78r1q4qm/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1463b766460cff6583cac4502bc0110def9fa3c --- /dev/null +++ b/run-78r1q4qm/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:114b5bee031bed27bdda8845b3a81789078b31689b355626486f8fff75e1b51e +size 395900602 diff --git a/run-78r1q4qm/checkpoint-616/rng_state.pth b/run-78r1q4qm/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-78r1q4qm/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-78r1q4qm/checkpoint-616/scheduler.pt b/run-78r1q4qm/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4375c00b6761511a8c6b59cfcef3dc94df7bbb9 --- /dev/null +++ b/run-78r1q4qm/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4f9e78def26cc3eb3de31022ea4615a118c4c342772222df503a7bef30ed57 +size 1064 diff --git a/run-78r1q4qm/checkpoint-616/trainer_state.json b/run-78r1q4qm/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6bff89fe18b1ae4c4e46e4337a300dec06f664f8 --- /dev/null +++ b/run-78r1q4qm/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9197442578340399, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-78r1q4qm/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.547580751778858e-06, + "loss": 1.5048, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8190133037694013, + "eval_f1": 0.7497452884388607, + "eval_loss": 1.2520768642425537, + "eval_precision": 0.7047076004774175, + "eval_recall": 0.8190133037694013, + "eval_runtime": 7.6891, + "eval_samples_per_second": 469.236, + "eval_steps_per_second": 3.772, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.5095161503557717e-05, + "loss": 1.3056, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.2642742255336575e-05, + "loss": 1.0163, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8295454545454546, + "eval_f1": 0.7533478141085029, + "eval_loss": 0.9421566724777222, + "eval_precision": 0.7704949815553549, + "eval_recall": 0.8295454545454546, + "eval_runtime": 8.2111, + "eval_samples_per_second": 439.406, + "eval_steps_per_second": 3.532, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.0190323007115433e-05, + "loss": 0.9123, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8883037694013304, + "eval_f1": 0.8738292067198308, + "eval_loss": 0.8915470838546753, + "eval_precision": 0.8834896089031792, + "eval_recall": 0.8883037694013304, + "eval_runtime": 8.304, + "eval_samples_per_second": 434.491, + "eval_steps_per_second": 3.492, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 3.7737903758894295e-05, + "loss": 0.8733, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 4.528548451067315e-05, + "loss": 0.8338, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.8984473037528438, + "eval_loss": 0.8255354762077332, + "eval_precision": 0.8961410730702706, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.2716, + "eval_samples_per_second": 436.191, + "eval_steps_per_second": 3.506, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 5.283306526245201e-05, + "loss": 0.8196, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 6.038064601423087e-05, + "loss": 0.7963, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.8911238744934369, + "eval_loss": 0.8483710885047913, + "eval_precision": 0.8976337435064324, + "eval_recall": 0.8891352549889135, + "eval_runtime": 7.9134, + "eval_samples_per_second": 455.938, + "eval_steps_per_second": 3.665, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 6.792822676600973e-05, + "loss": 0.7787, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.910060370144538, + "eval_loss": 0.7995494604110718, + "eval_precision": 0.9079550086367597, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.3898, + "eval_samples_per_second": 488.239, + "eval_steps_per_second": 3.924, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 7.314210627041658e-05, + "loss": 0.7816, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 7.294830913523775e-05, + "loss": 0.7739, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9096704147420414, + "eval_loss": 0.8007507920265198, + "eval_precision": 0.9076401242744006, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.987, + "eval_samples_per_second": 451.736, + "eval_steps_per_second": 3.631, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 7.251581344671944e-05, + "loss": 0.7699, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 7.184745757808842e-05, + "loss": 0.7627, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9073982352056859, + "eval_loss": 0.80825275182724, + "eval_precision": 0.9048124953216002, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.6982, + "eval_samples_per_second": 468.679, + "eval_steps_per_second": 3.767, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 7.094762780059279e-05, + "loss": 0.7597, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.8995852547979453, + "eval_loss": 0.818570077419281, + "eval_precision": 0.9028495914589457, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.175, + "eval_samples_per_second": 441.344, + "eval_steps_per_second": 3.547, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 6.982222949737715e-05, + "loss": 0.7539, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 6.847864840776351e-05, + "loss": 0.7482, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9064533775167782, + "eval_loss": 0.8048622012138367, + "eval_precision": 0.9026191824487212, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.0612, + "eval_samples_per_second": 447.578, + "eval_steps_per_second": 3.597, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 6.692570215628312e-05, + "loss": 0.7442, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9160941698994952, + "eval_loss": 0.8009539246559143, + "eval_precision": 0.9135236129747109, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.6856, + "eval_samples_per_second": 469.448, + "eval_steps_per_second": 3.773, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 6.517358238456349e-05, + "loss": 0.7423, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 6.323378786584592e-05, + "loss": 0.7349, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9086491186405646, + "eval_loss": 0.7993532419204712, + "eval_precision": 0.9055313602449556, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.387, + "eval_samples_per_second": 488.427, + "eval_steps_per_second": 3.926, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 6.111904904108862e-05, + "loss": 0.7387, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 5.884324447190766e-05, + "loss": 0.73, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9085059761133972, + "eval_loss": 0.8151747584342957, + "eval_precision": 0.9097424030014796, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.6869, + "eval_samples_per_second": 469.367, + "eval_steps_per_second": 3.773, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 5.642130975865698e-05, + "loss": 0.7357, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9136859749056642, + "eval_loss": 0.8036264777183533, + "eval_precision": 0.9104500256149863, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.6756, + "eval_samples_per_second": 470.061, + "eval_steps_per_second": 3.778, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 5.386913952139765e-05, + "loss": 0.7244, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 5.120348308703371e-05, + "loss": 0.7259, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9133321216641729, + "eval_loss": 0.7986628413200378, + "eval_precision": 0.9174428415229984, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.3452, + "eval_samples_per_second": 432.343, + "eval_steps_per_second": 3.475, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 4.844183456719662e-05, + "loss": 0.7235, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 4.560231804827269e-05, + "loss": 0.7242, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9080194516773318, + "eval_loss": 0.811267614364624, + "eval_precision": 0.911542493245158, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.9544, + "eval_samples_per_second": 453.585, + "eval_steps_per_second": 3.646, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 4.2703568647046315e-05, + "loss": 0.7194, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9106013333864512, + "eval_loss": 0.811810314655304, + "eval_precision": 0.9126409383553065, + "eval_recall": 0.9110310421286031, + "eval_runtime": 7.8199, + "eval_samples_per_second": 461.386, + "eval_steps_per_second": 3.708, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 3.976461021256375e-05, + "loss": 0.7178, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 3.680473047683355e-05, + "loss": 0.7161, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9082044031203763, + "eval_loss": 0.8140824437141418, + "eval_precision": 0.9079606401326834, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.2504, + "eval_samples_per_second": 437.313, + "eval_steps_per_second": 3.515, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 3.384335447372168e-05, + "loss": 0.7167, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 3.089991705676482e-05, + "loss": 0.7118, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9093533512159098, + "eval_loss": 0.8029122948646545, + "eval_precision": 0.9105466254553665, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8636, + "eval_samples_per_second": 458.821, + "eval_steps_per_second": 3.688, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 2.7993735352539614e-05, + "loss": 0.7161, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9077716625995479, + "eval_loss": 0.8046334981918335, + "eval_precision": 0.9045966991382139, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8012, + "eval_samples_per_second": 462.496, + "eval_steps_per_second": 3.717, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 2.5143881986647538e-05, + "loss": 0.7082, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.2369059914305717e-05, + "loss": 0.7044, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9116858371937434, + "eval_loss": 0.8082920908927917, + "eval_precision": 0.9106773171645975, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.2003, + "eval_samples_per_second": 439.982, + "eval_steps_per_second": 3.536, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.968747967700222e-05, + "loss": 0.7116, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.908676208972142, + "eval_loss": 0.8065844774246216, + "eval_precision": 0.9076234173489904, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.1512, + "eval_samples_per_second": 442.632, + "eval_steps_per_second": 3.558, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.7116739890753415e-05, + "loss": 0.7076, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.467371175029252e-05, + "loss": 0.7056, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9125632731403359, + "eval_loss": 0.8065965175628662, + "eval_precision": 0.9105932192170725, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.0289, + "eval_samples_per_second": 449.377, + "eval_steps_per_second": 3.612, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.2374428307162488e-05, + "loss": 0.7057, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.0233979248356657e-05, + "loss": 0.7047, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9188440242001118, + "eval_loss": 0.7959193587303162, + "eval_precision": 0.9172684812663667, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.8926, + "eval_samples_per_second": 457.136, + "eval_steps_per_second": 3.674, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 8.266411866051708e-06, + "loss": 0.7038, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9142579592282161, + "eval_loss": 0.8025432825088501, + "eval_precision": 0.9125897346575084, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.9269, + "eval_samples_per_second": 455.158, + "eval_steps_per_second": 3.658, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 6.484638868346346e-06, + "loss": 0.7038, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 4.900353636023589e-06, + "loss": 0.7008, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.9175067606554699, + "eval_loss": 0.7975662350654602, + "eval_precision": 0.9162146661115457, + "eval_recall": 0.9226718403547672, + "eval_runtime": 7.9962, + "eval_samples_per_second": 451.213, + "eval_steps_per_second": 3.627, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.5239534814876163e-06, + "loss": 0.7026, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.364471413509736e-06, + "loss": 0.7001, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9139860019306476, + "eval_loss": 0.8013781905174255, + "eval_precision": 0.9114583173654625, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.9888, + "eval_samples_per_second": 451.63, + "eval_steps_per_second": 3.63, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.4295168555964337e-06, + "loss": 0.7027, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9139427472468534, + "eval_loss": 0.8011085987091064, + "eval_precision": 0.9121897348256023, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.813, + "eval_samples_per_second": 461.793, + "eval_steps_per_second": 3.712, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 7.252257070314998e-07, + "loss": 0.7032, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.562200743306797e-07, + "loss": 0.7031, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9246119733924612, + "eval_f1": 0.9197442578340399, + "eval_loss": 0.7968980073928833, + "eval_precision": 0.9164694498984551, + "eval_recall": 0.9246119733924612, + "eval_runtime": 7.5834, + "eval_samples_per_second": 475.773, + "eval_steps_per_second": 3.824, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.0975529012840335, + "learning_rate": 7.31534749787797e-05, + "metric": "eval/loss", + "weight_decay": 0.1703233874520093 + } +} diff --git a/run-78r1q4qm/checkpoint-616/training_args.bin b/run-78r1q4qm/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5cd2a476e9c740a2536bacfe23cbfed34b33a877 --- /dev/null +++ b/run-78r1q4qm/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f180ec07ea427ceec2d2c9fde798cb16d8f09b659616fa9a21422bfcb15ffd +size 4792 diff --git a/run-78r1q4qm/checkpoint-630/model.safetensors b/run-78r1q4qm/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fcbeb642e8b4981c1607814c502ceb475e31fdbe --- /dev/null +++ b/run-78r1q4qm/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fbc77fec636755ce2fe10666bff7520ec2cbf6fdab37e491e50cb3aaba26655 +size 198025308 diff --git a/run-78r1q4qm/checkpoint-630/optimizer.pt b/run-78r1q4qm/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdb0f5731a69df8ec284988e9d5503c53b6708ca --- /dev/null +++ b/run-78r1q4qm/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf64adbc14654393ca80aa738094c87db9cd4f1b41a0fa7d492067747d86ddfb +size 395900602 diff --git a/run-78r1q4qm/checkpoint-630/rng_state.pth b/run-78r1q4qm/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-78r1q4qm/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-78r1q4qm/checkpoint-630/scheduler.pt b/run-78r1q4qm/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4a35be6f8bca4a6795ba2e23835dfdf5a35665a --- /dev/null +++ b/run-78r1q4qm/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd2fc4977e5268c3d9fb99bc857709735487a3d49707a5743749b4156e8101b +size 1064 diff --git a/run-78r1q4qm/checkpoint-630/trainer_state.json b/run-78r1q4qm/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..18c41d55d4b0ae37c20e9f695906e751e9eb8393 --- /dev/null +++ b/run-78r1q4qm/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9197442578340399, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-78r1q4qm/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.547580751778858e-06, + "loss": 1.5048, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8190133037694013, + "eval_f1": 0.7497452884388607, + "eval_loss": 1.2520768642425537, + "eval_precision": 0.7047076004774175, + "eval_recall": 0.8190133037694013, + "eval_runtime": 7.6891, + "eval_samples_per_second": 469.236, + "eval_steps_per_second": 3.772, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.5095161503557717e-05, + "loss": 1.3056, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.2642742255336575e-05, + "loss": 1.0163, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8295454545454546, + "eval_f1": 0.7533478141085029, + "eval_loss": 0.9421566724777222, + "eval_precision": 0.7704949815553549, + "eval_recall": 0.8295454545454546, + "eval_runtime": 8.2111, + "eval_samples_per_second": 439.406, + "eval_steps_per_second": 3.532, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.0190323007115433e-05, + "loss": 0.9123, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8883037694013304, + "eval_f1": 0.8738292067198308, + "eval_loss": 0.8915470838546753, + "eval_precision": 0.8834896089031792, + "eval_recall": 0.8883037694013304, + "eval_runtime": 8.304, + "eval_samples_per_second": 434.491, + "eval_steps_per_second": 3.492, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 3.7737903758894295e-05, + "loss": 0.8733, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 4.528548451067315e-05, + "loss": 0.8338, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.8984473037528438, + "eval_loss": 0.8255354762077332, + "eval_precision": 0.8961410730702706, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.2716, + "eval_samples_per_second": 436.191, + "eval_steps_per_second": 3.506, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 5.283306526245201e-05, + "loss": 0.8196, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 6.038064601423087e-05, + "loss": 0.7963, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.8911238744934369, + "eval_loss": 0.8483710885047913, + "eval_precision": 0.8976337435064324, + "eval_recall": 0.8891352549889135, + "eval_runtime": 7.9134, + "eval_samples_per_second": 455.938, + "eval_steps_per_second": 3.665, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 6.792822676600973e-05, + "loss": 0.7787, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.910060370144538, + "eval_loss": 0.7995494604110718, + "eval_precision": 0.9079550086367597, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.3898, + "eval_samples_per_second": 488.239, + "eval_steps_per_second": 3.924, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 7.314210627041658e-05, + "loss": 0.7816, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 7.294830913523775e-05, + "loss": 0.7739, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9096704147420414, + "eval_loss": 0.8007507920265198, + "eval_precision": 0.9076401242744006, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.987, + "eval_samples_per_second": 451.736, + "eval_steps_per_second": 3.631, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 7.251581344671944e-05, + "loss": 0.7699, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 7.184745757808842e-05, + "loss": 0.7627, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9073982352056859, + "eval_loss": 0.80825275182724, + "eval_precision": 0.9048124953216002, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.6982, + "eval_samples_per_second": 468.679, + "eval_steps_per_second": 3.767, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 7.094762780059279e-05, + "loss": 0.7597, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.8995852547979453, + "eval_loss": 0.818570077419281, + "eval_precision": 0.9028495914589457, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.175, + "eval_samples_per_second": 441.344, + "eval_steps_per_second": 3.547, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 6.982222949737715e-05, + "loss": 0.7539, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 6.847864840776351e-05, + "loss": 0.7482, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9064533775167782, + "eval_loss": 0.8048622012138367, + "eval_precision": 0.9026191824487212, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.0612, + "eval_samples_per_second": 447.578, + "eval_steps_per_second": 3.597, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 6.692570215628312e-05, + "loss": 0.7442, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9160941698994952, + "eval_loss": 0.8009539246559143, + "eval_precision": 0.9135236129747109, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.6856, + "eval_samples_per_second": 469.448, + "eval_steps_per_second": 3.773, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 6.517358238456349e-05, + "loss": 0.7423, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 6.323378786584592e-05, + "loss": 0.7349, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9086491186405646, + "eval_loss": 0.7993532419204712, + "eval_precision": 0.9055313602449556, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.387, + "eval_samples_per_second": 488.427, + "eval_steps_per_second": 3.926, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 6.111904904108862e-05, + "loss": 0.7387, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 5.884324447190766e-05, + "loss": 0.73, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9085059761133972, + "eval_loss": 0.8151747584342957, + "eval_precision": 0.9097424030014796, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.6869, + "eval_samples_per_second": 469.367, + "eval_steps_per_second": 3.773, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 5.642130975865698e-05, + "loss": 0.7357, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9136859749056642, + "eval_loss": 0.8036264777183533, + "eval_precision": 0.9104500256149863, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.6756, + "eval_samples_per_second": 470.061, + "eval_steps_per_second": 3.778, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 5.386913952139765e-05, + "loss": 0.7244, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 5.120348308703371e-05, + "loss": 0.7259, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9133321216641729, + "eval_loss": 0.7986628413200378, + "eval_precision": 0.9174428415229984, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.3452, + "eval_samples_per_second": 432.343, + "eval_steps_per_second": 3.475, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 4.844183456719662e-05, + "loss": 0.7235, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 4.560231804827269e-05, + "loss": 0.7242, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9080194516773318, + "eval_loss": 0.811267614364624, + "eval_precision": 0.911542493245158, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.9544, + "eval_samples_per_second": 453.585, + "eval_steps_per_second": 3.646, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 4.2703568647046315e-05, + "loss": 0.7194, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9106013333864512, + "eval_loss": 0.811810314655304, + "eval_precision": 0.9126409383553065, + "eval_recall": 0.9110310421286031, + "eval_runtime": 7.8199, + "eval_samples_per_second": 461.386, + "eval_steps_per_second": 3.708, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 3.976461021256375e-05, + "loss": 0.7178, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 3.680473047683355e-05, + "loss": 0.7161, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9082044031203763, + "eval_loss": 0.8140824437141418, + "eval_precision": 0.9079606401326834, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.2504, + "eval_samples_per_second": 437.313, + "eval_steps_per_second": 3.515, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 3.384335447372168e-05, + "loss": 0.7167, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 3.089991705676482e-05, + "loss": 0.7118, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9093533512159098, + "eval_loss": 0.8029122948646545, + "eval_precision": 0.9105466254553665, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8636, + "eval_samples_per_second": 458.821, + "eval_steps_per_second": 3.688, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 2.7993735352539614e-05, + "loss": 0.7161, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9077716625995479, + "eval_loss": 0.8046334981918335, + "eval_precision": 0.9045966991382139, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8012, + "eval_samples_per_second": 462.496, + "eval_steps_per_second": 3.717, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 2.5143881986647538e-05, + "loss": 0.7082, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.2369059914305717e-05, + "loss": 0.7044, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9116858371937434, + "eval_loss": 0.8082920908927917, + "eval_precision": 0.9106773171645975, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.2003, + "eval_samples_per_second": 439.982, + "eval_steps_per_second": 3.536, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.968747967700222e-05, + "loss": 0.7116, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.908676208972142, + "eval_loss": 0.8065844774246216, + "eval_precision": 0.9076234173489904, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.1512, + "eval_samples_per_second": 442.632, + "eval_steps_per_second": 3.558, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.7116739890753415e-05, + "loss": 0.7076, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.467371175029252e-05, + "loss": 0.7056, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9125632731403359, + "eval_loss": 0.8065965175628662, + "eval_precision": 0.9105932192170725, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.0289, + "eval_samples_per_second": 449.377, + "eval_steps_per_second": 3.612, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.2374428307162488e-05, + "loss": 0.7057, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.0233979248356657e-05, + "loss": 0.7047, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9188440242001118, + "eval_loss": 0.7959193587303162, + "eval_precision": 0.9172684812663667, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.8926, + "eval_samples_per_second": 457.136, + "eval_steps_per_second": 3.674, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 8.266411866051708e-06, + "loss": 0.7038, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9142579592282161, + "eval_loss": 0.8025432825088501, + "eval_precision": 0.9125897346575084, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.9269, + "eval_samples_per_second": 455.158, + "eval_steps_per_second": 3.658, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 6.484638868346346e-06, + "loss": 0.7038, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 4.900353636023589e-06, + "loss": 0.7008, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.9175067606554699, + "eval_loss": 0.7975662350654602, + "eval_precision": 0.9162146661115457, + "eval_recall": 0.9226718403547672, + "eval_runtime": 7.9962, + "eval_samples_per_second": 451.213, + "eval_steps_per_second": 3.627, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.5239534814876163e-06, + "loss": 0.7026, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.364471413509736e-06, + "loss": 0.7001, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9139860019306476, + "eval_loss": 0.8013781905174255, + "eval_precision": 0.9114583173654625, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.9888, + "eval_samples_per_second": 451.63, + "eval_steps_per_second": 3.63, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.4295168555964337e-06, + "loss": 0.7027, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9139427472468534, + "eval_loss": 0.8011085987091064, + "eval_precision": 0.9121897348256023, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.813, + "eval_samples_per_second": 461.793, + "eval_steps_per_second": 3.712, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 7.252257070314998e-07, + "loss": 0.7032, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.562200743306797e-07, + "loss": 0.7031, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9246119733924612, + "eval_f1": 0.9197442578340399, + "eval_loss": 0.7968980073928833, + "eval_precision": 0.9164694498984551, + "eval_recall": 0.9246119733924612, + "eval_runtime": 7.5834, + "eval_samples_per_second": 475.773, + "eval_steps_per_second": 3.824, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 2.5577937382331434e-08, + "loss": 0.7043, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9163540755173245, + "eval_loss": 0.8023125529289246, + "eval_precision": 0.9149826551274394, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.705, + "eval_samples_per_second": 468.27, + "eval_steps_per_second": 3.764, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.0975529012840335, + "learning_rate": 7.31534749787797e-05, + "metric": "eval/loss", + "weight_decay": 0.1703233874520093 + } +} diff --git a/run-78r1q4qm/checkpoint-630/training_args.bin b/run-78r1q4qm/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5cd2a476e9c740a2536bacfe23cbfed34b33a877 --- /dev/null +++ b/run-78r1q4qm/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f180ec07ea427ceec2d2c9fde798cb16d8f09b659616fa9a21422bfcb15ffd +size 4792 diff --git a/run-7p8ncfyp/checkpoint-1232/model.safetensors b/run-7p8ncfyp/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69bc0ba1eeb94eb847b32526ffd03691f1f723b0 --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe6ac006a19f89d98f929275f891443837741f72170af39844f785cb50dad94 +size 198025308 diff --git a/run-7p8ncfyp/checkpoint-1232/optimizer.pt b/run-7p8ncfyp/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cc525a325aec148830b0f924eeb13f3c1ac458e --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315c68e5b6fa885d01f0c1748d76f6b113a950c1d358a03f43a4205cedc1284a +size 395900602 diff --git a/run-7p8ncfyp/checkpoint-1232/rng_state.pth b/run-7p8ncfyp/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-7p8ncfyp/checkpoint-1232/scheduler.pt b/run-7p8ncfyp/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..822b847eb85840abb471b22d5afe5111ab50564b --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77df2cdc16c1f8370e61d1a9bda69a1222981e4ce153efbd9ed8773abd19c4d6 +size 1064 diff --git a/run-7p8ncfyp/checkpoint-1232/trainer_state.json b/run-7p8ncfyp/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f99cb4dcae742d87ad4d308fbf87ece1ec9df3a1 --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9259977827050998, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-7p8ncfyp/checkpoint-1020", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.2818858201246763e-05, + "loss": 1.4276, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9711073040962219, + "eval_runtime": 6.8742, + "eval_samples_per_second": 524.858, + "eval_steps_per_second": 8.292, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 2.5637716402493526e-05, + "loss": 1.0106, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 3.8456574603740286e-05, + "loss": 0.8931, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8977272727272727, + "eval_loss": 0.8576710224151611, + "eval_runtime": 6.8248, + "eval_samples_per_second": 528.663, + "eval_steps_per_second": 8.352, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 5.127543280498705e-05, + "loss": 0.8383, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8083691000938416, + "eval_runtime": 6.9534, + "eval_samples_per_second": 518.885, + "eval_steps_per_second": 8.197, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 6.409429100623381e-05, + "loss": 0.8136, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 7.691314920748057e-05, + "loss": 0.7976, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.7977318167686462, + "eval_runtime": 6.8833, + "eval_samples_per_second": 524.167, + "eval_steps_per_second": 8.281, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 8.973200740872735e-05, + "loss": 0.787, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001025508656099741, + "loss": 0.7793, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.791247546672821, + "eval_runtime": 6.876, + "eval_samples_per_second": 524.727, + "eval_steps_per_second": 8.29, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011536972381122085, + "loss": 0.7779, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.7992942333221436, + "eval_runtime": 6.7623, + "eval_samples_per_second": 533.546, + "eval_steps_per_second": 8.429, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00012818858201246763, + "loss": 0.7673, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001410074402137144, + "loss": 0.7672, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9038248337028825, + "eval_loss": 0.8262019753456116, + "eval_runtime": 6.6985, + "eval_samples_per_second": 538.628, + "eval_steps_per_second": 8.509, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00015382629841496115, + "loss": 0.759, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00016664515661620792, + "loss": 0.7489, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8907982261640798, + "eval_loss": 0.8478919267654419, + "eval_runtime": 6.956, + "eval_samples_per_second": 518.69, + "eval_steps_per_second": 8.194, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001794640148174547, + "loss": 0.7478, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8152016401290894, + "eval_runtime": 6.7524, + "eval_samples_per_second": 534.329, + "eval_steps_per_second": 8.441, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00019228287301870144, + "loss": 0.7573, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0002051017312199482, + "loss": 0.7466, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8191770315170288, + "eval_runtime": 6.9724, + "eval_samples_per_second": 517.472, + "eval_steps_per_second": 8.175, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00020776765088937024, + "loss": 0.7425, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8126404881477356, + "eval_runtime": 6.7328, + "eval_samples_per_second": 535.88, + "eval_steps_per_second": 8.466, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00020651688186677685, + "loss": 0.7487, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00020429317977993744, + "loss": 0.7336, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.746119733924612, + "eval_loss": 1.066055417060852, + "eval_runtime": 6.7552, + "eval_samples_per_second": 534.109, + "eval_steps_per_second": 8.438, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002011176547699678, + "loss": 0.7377, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00019702045286293574, + "loss": 0.7275, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8417561650276184, + "eval_runtime": 6.7751, + "eval_samples_per_second": 532.536, + "eval_steps_per_second": 8.413, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0001920404697863531, + "loss": 0.7242, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8305643796920776, + "eval_runtime": 7.1353, + "eval_samples_per_second": 505.655, + "eval_steps_per_second": 7.988, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00018622498172263786, + "loss": 0.73, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00017962919650489352, + "loss": 0.7154, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.8019810914993286, + "eval_runtime": 6.6701, + "eval_samples_per_second": 540.924, + "eval_steps_per_second": 8.546, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00017231572951561204, + "loss": 0.7204, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001643540092637213, + "loss": 0.7161, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8921840354767184, + "eval_loss": 0.848892331123352, + "eval_runtime": 6.6483, + "eval_samples_per_second": 542.696, + "eval_steps_per_second": 8.574, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001558196182829772, + "loss": 0.7193, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8188620209693909, + "eval_runtime": 6.5582, + "eval_samples_per_second": 550.151, + "eval_steps_per_second": 8.691, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00014679357560871124, + "loss": 0.7134, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0001373615676445556, + "loss": 0.7062, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8058794736862183, + "eval_runtime": 6.7997, + "eval_samples_per_second": 530.609, + "eval_steps_per_second": 8.383, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00012761313472071305, + "loss": 0.7101, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.000117640821065971, + "loss": 0.7049, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8060556054115295, + "eval_runtime": 6.678, + "eval_samples_per_second": 540.281, + "eval_steps_per_second": 8.535, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00010753929626298095, + "loss": 0.6954, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8048175573348999, + "eval_runtime": 6.7207, + "eval_samples_per_second": 536.847, + "eval_steps_per_second": 8.481, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 9.740445652703957e-05, + "loss": 0.7029, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 8.73325143401504e-05, + "loss": 0.7003, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8217228055000305, + "eval_runtime": 6.8899, + "eval_samples_per_second": 523.668, + "eval_steps_per_second": 8.273, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 7.741908508268893e-05, + "loss": 0.6958, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8045816421508789, + "eval_runtime": 6.7015, + "eval_samples_per_second": 538.388, + "eval_steps_per_second": 8.506, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 6.775827933349775e-05, + "loss": 0.6965, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 5.8441809455426297e-05, + "loss": 0.6948, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8058636784553528, + "eval_runtime": 7.0224, + "eval_samples_per_second": 513.786, + "eval_steps_per_second": 8.117, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 4.955811894771425e-05, + "loss": 0.6949, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 4.119154283048704e-05, + "loss": 0.6914, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9259977827050998, + "eval_loss": 0.7917850017547607, + "eval_runtime": 7.1869, + "eval_samples_per_second": 502.024, + "eval_steps_per_second": 7.931, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 3.3421507032035854e-05, + "loss": 0.698, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8053988218307495, + "eval_runtime": 6.7735, + "eval_samples_per_second": 532.667, + "eval_steps_per_second": 8.415, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 2.6321774379292607e-05, + "loss": 0.6885, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.9959744349494178e-05, + "loss": 0.6884, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9257206208425721, + "eval_loss": 0.7929933071136475, + "eval_runtime": 7.0069, + "eval_samples_per_second": 514.92, + "eval_steps_per_second": 8.135, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.439581323066243e-05, + "loss": 0.6892, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 9.682800765051576e-06, + "loss": 0.6866, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.7981271147727966, + "eval_runtime": 6.7443, + "eval_samples_per_second": 534.969, + "eval_steps_per_second": 8.452, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 5.865448718574433e-06, + "loss": 0.6868, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.7982448935508728, + "eval_runtime": 6.8979, + "eval_samples_per_second": 523.059, + "eval_steps_per_second": 8.263, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 2.9799961364100053e-06, + "loss": 0.6912, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.0538353169929005e-06, + "loss": 0.6906, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.7976700663566589, + "eval_runtime": 7.096, + "eval_samples_per_second": 508.458, + "eval_steps_per_second": 8.033, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00020805992926638977, + "metric": "eval/loss", + "warmup_ratio": 0.3344556375512744 + } +} diff --git a/run-7p8ncfyp/checkpoint-1232/training_args.bin b/run-7p8ncfyp/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fa3981b6eb92e79fa493138ec1c94f2de69df06 --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff673ead6d8bb290bfac3f8f204eb1e158820bbe3d3564f9920e28dfacc556d +size 4792 diff --git a/run-7p8ncfyp/checkpoint-1260/model.safetensors b/run-7p8ncfyp/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..513328b111d5b2c5bbf775eae7a8752765aaca00 --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894c2d98011c9567fb918dae3ca81305b523af6b0d7e1f6eac049fd4aa5c578b +size 198025308 diff --git a/run-7p8ncfyp/checkpoint-1260/optimizer.pt b/run-7p8ncfyp/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..68ff136083cbbdbe2e4f593d683ede8702361588 --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1568896af0332b0943a38600e696cc1f526b61b3eac4af21958698712eabd674 +size 395900602 diff --git a/run-7p8ncfyp/checkpoint-1260/rng_state.pth b/run-7p8ncfyp/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-7p8ncfyp/checkpoint-1260/scheduler.pt b/run-7p8ncfyp/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b36fc183fd2ba20e6bcfea83b0e7d340cbd234ae --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3193be2af46b1d091f3e595dee62e531f274aaefcf8b5f3eb3697f849837783c +size 1064 diff --git a/run-7p8ncfyp/checkpoint-1260/trainer_state.json b/run-7p8ncfyp/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cc72837a89bb5c7885abb436a76b1df0d3a9c36a --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.926829268292683, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-7p8ncfyp/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.2818858201246763e-05, + "loss": 1.4276, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9711073040962219, + "eval_runtime": 6.8742, + "eval_samples_per_second": 524.858, + "eval_steps_per_second": 8.292, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 2.5637716402493526e-05, + "loss": 1.0106, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 3.8456574603740286e-05, + "loss": 0.8931, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8977272727272727, + "eval_loss": 0.8576710224151611, + "eval_runtime": 6.8248, + "eval_samples_per_second": 528.663, + "eval_steps_per_second": 8.352, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 5.127543280498705e-05, + "loss": 0.8383, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8083691000938416, + "eval_runtime": 6.9534, + "eval_samples_per_second": 518.885, + "eval_steps_per_second": 8.197, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 6.409429100623381e-05, + "loss": 0.8136, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 7.691314920748057e-05, + "loss": 0.7976, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.7977318167686462, + "eval_runtime": 6.8833, + "eval_samples_per_second": 524.167, + "eval_steps_per_second": 8.281, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 8.973200740872735e-05, + "loss": 0.787, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001025508656099741, + "loss": 0.7793, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.791247546672821, + "eval_runtime": 6.876, + "eval_samples_per_second": 524.727, + "eval_steps_per_second": 8.29, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011536972381122085, + "loss": 0.7779, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.7992942333221436, + "eval_runtime": 6.7623, + "eval_samples_per_second": 533.546, + "eval_steps_per_second": 8.429, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00012818858201246763, + "loss": 0.7673, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001410074402137144, + "loss": 0.7672, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9038248337028825, + "eval_loss": 0.8262019753456116, + "eval_runtime": 6.6985, + "eval_samples_per_second": 538.628, + "eval_steps_per_second": 8.509, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00015382629841496115, + "loss": 0.759, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00016664515661620792, + "loss": 0.7489, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8907982261640798, + "eval_loss": 0.8478919267654419, + "eval_runtime": 6.956, + "eval_samples_per_second": 518.69, + "eval_steps_per_second": 8.194, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001794640148174547, + "loss": 0.7478, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8152016401290894, + "eval_runtime": 6.7524, + "eval_samples_per_second": 534.329, + "eval_steps_per_second": 8.441, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00019228287301870144, + "loss": 0.7573, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0002051017312199482, + "loss": 0.7466, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8191770315170288, + "eval_runtime": 6.9724, + "eval_samples_per_second": 517.472, + "eval_steps_per_second": 8.175, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00020776765088937024, + "loss": 0.7425, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8126404881477356, + "eval_runtime": 6.7328, + "eval_samples_per_second": 535.88, + "eval_steps_per_second": 8.466, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00020651688186677685, + "loss": 0.7487, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00020429317977993744, + "loss": 0.7336, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.746119733924612, + "eval_loss": 1.066055417060852, + "eval_runtime": 6.7552, + "eval_samples_per_second": 534.109, + "eval_steps_per_second": 8.438, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002011176547699678, + "loss": 0.7377, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00019702045286293574, + "loss": 0.7275, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8417561650276184, + "eval_runtime": 6.7751, + "eval_samples_per_second": 532.536, + "eval_steps_per_second": 8.413, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0001920404697863531, + "loss": 0.7242, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8305643796920776, + "eval_runtime": 7.1353, + "eval_samples_per_second": 505.655, + "eval_steps_per_second": 7.988, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00018622498172263786, + "loss": 0.73, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00017962919650489352, + "loss": 0.7154, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.8019810914993286, + "eval_runtime": 6.6701, + "eval_samples_per_second": 540.924, + "eval_steps_per_second": 8.546, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00017231572951561204, + "loss": 0.7204, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001643540092637213, + "loss": 0.7161, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8921840354767184, + "eval_loss": 0.848892331123352, + "eval_runtime": 6.6483, + "eval_samples_per_second": 542.696, + "eval_steps_per_second": 8.574, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001558196182829772, + "loss": 0.7193, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8188620209693909, + "eval_runtime": 6.5582, + "eval_samples_per_second": 550.151, + "eval_steps_per_second": 8.691, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00014679357560871124, + "loss": 0.7134, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0001373615676445556, + "loss": 0.7062, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8058794736862183, + "eval_runtime": 6.7997, + "eval_samples_per_second": 530.609, + "eval_steps_per_second": 8.383, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00012761313472071305, + "loss": 0.7101, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.000117640821065971, + "loss": 0.7049, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8060556054115295, + "eval_runtime": 6.678, + "eval_samples_per_second": 540.281, + "eval_steps_per_second": 8.535, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00010753929626298095, + "loss": 0.6954, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8048175573348999, + "eval_runtime": 6.7207, + "eval_samples_per_second": 536.847, + "eval_steps_per_second": 8.481, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 9.740445652703957e-05, + "loss": 0.7029, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 8.73325143401504e-05, + "loss": 0.7003, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8217228055000305, + "eval_runtime": 6.8899, + "eval_samples_per_second": 523.668, + "eval_steps_per_second": 8.273, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 7.741908508268893e-05, + "loss": 0.6958, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8045816421508789, + "eval_runtime": 6.7015, + "eval_samples_per_second": 538.388, + "eval_steps_per_second": 8.506, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 6.775827933349775e-05, + "loss": 0.6965, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 5.8441809455426297e-05, + "loss": 0.6948, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8058636784553528, + "eval_runtime": 7.0224, + "eval_samples_per_second": 513.786, + "eval_steps_per_second": 8.117, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 4.955811894771425e-05, + "loss": 0.6949, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 4.119154283048704e-05, + "loss": 0.6914, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9259977827050998, + "eval_loss": 0.7917850017547607, + "eval_runtime": 7.1869, + "eval_samples_per_second": 502.024, + "eval_steps_per_second": 7.931, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 3.3421507032035854e-05, + "loss": 0.698, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8053988218307495, + "eval_runtime": 6.7735, + "eval_samples_per_second": 532.667, + "eval_steps_per_second": 8.415, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 2.6321774379292607e-05, + "loss": 0.6885, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.9959744349494178e-05, + "loss": 0.6884, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9257206208425721, + "eval_loss": 0.7929933071136475, + "eval_runtime": 7.0069, + "eval_samples_per_second": 514.92, + "eval_steps_per_second": 8.135, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.439581323066243e-05, + "loss": 0.6892, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 9.682800765051576e-06, + "loss": 0.6866, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.7981271147727966, + "eval_runtime": 6.7443, + "eval_samples_per_second": 534.969, + "eval_steps_per_second": 8.452, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 5.865448718574433e-06, + "loss": 0.6868, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.7982448935508728, + "eval_runtime": 6.8979, + "eval_samples_per_second": 523.059, + "eval_steps_per_second": 8.263, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 2.9799961364100053e-06, + "loss": 0.6912, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.0538353169929005e-06, + "loss": 0.6906, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.7976700663566589, + "eval_runtime": 7.096, + "eval_samples_per_second": 508.458, + "eval_steps_per_second": 8.033, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.0525177030672374e-07, + "loss": 0.6869, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.926829268292683, + "eval_loss": 0.7968323826789856, + "eval_runtime": 6.8314, + "eval_samples_per_second": 528.152, + "eval_steps_per_second": 8.344, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00020805992926638977, + "metric": "eval/loss", + "warmup_ratio": 0.3344556375512744 + } +} diff --git a/run-7p8ncfyp/checkpoint-1260/training_args.bin b/run-7p8ncfyp/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fa3981b6eb92e79fa493138ec1c94f2de69df06 --- /dev/null +++ b/run-7p8ncfyp/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff673ead6d8bb290bfac3f8f204eb1e158820bbe3d3564f9920e28dfacc556d +size 4792 diff --git a/run-81q5vyg9/checkpoint-1147/model.safetensors b/run-81q5vyg9/checkpoint-1147/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f20c43c3923bbcf694ec408fb6d0f4ec8eef6cc0 --- /dev/null +++ b/run-81q5vyg9/checkpoint-1147/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228b1c799b656d470095958691ab46f17855962e43eed21e2f297527b9c17d5d +size 198025308 diff --git a/run-81q5vyg9/checkpoint-1147/optimizer.pt b/run-81q5vyg9/checkpoint-1147/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6e6a4e575869b12dc3ca8d6b24cc583ecb51257 --- /dev/null +++ b/run-81q5vyg9/checkpoint-1147/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1366d4fbf5e864acce10be9b566db215373e62dc3459543b5c11672031a0a7bb +size 395900602 diff --git a/run-81q5vyg9/checkpoint-1147/rng_state.pth b/run-81q5vyg9/checkpoint-1147/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..43b1a3175dffb3289ba56a1e7f78b36ca1615834 --- /dev/null +++ b/run-81q5vyg9/checkpoint-1147/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2d43d63184b1920f250efdd6f38efa027691f238090c0a0b0f43317419a2de +size 14244 diff --git a/run-81q5vyg9/checkpoint-1147/scheduler.pt b/run-81q5vyg9/checkpoint-1147/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..80249580488af390ca04976966a3d2f555de4154 --- /dev/null +++ b/run-81q5vyg9/checkpoint-1147/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cef36f4024fe701948b3e39f57fb676ae3c680368e98031d1edac0135d482c6 +size 1064 diff --git a/run-81q5vyg9/checkpoint-1147/trainer_state.json b/run-81q5vyg9/checkpoint-1147/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..97995e3b5b84da76a0adb1cfe7babc8151d38d9c --- /dev/null +++ b/run-81q5vyg9/checkpoint-1147/trainer_state.json @@ -0,0 +1,534 @@ +{ + "best_metric": 0.9196230598669624, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-81q5vyg9/checkpoint-1147", + "epoch": 26.988235294117647, + "eval_steps": 500, + "global_step": 1147, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.605246565841852e-05, + "loss": 1.2925, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8783259423503326, + "eval_loss": 0.9201260209083557, + "eval_runtime": 6.3495, + "eval_samples_per_second": 568.235, + "eval_steps_per_second": 8.977, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 7.210493131683704e-05, + "loss": 0.9132, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00010815739697525557, + "loss": 0.8265, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8196706175804138, + "eval_runtime": 6.4546, + "eval_samples_per_second": 558.985, + "eval_steps_per_second": 8.831, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001442098626336741, + "loss": 0.8073, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8966186252771619, + "eval_loss": 0.8376795649528503, + "eval_runtime": 6.6742, + "eval_samples_per_second": 540.59, + "eval_steps_per_second": 8.54, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001802623282920926, + "loss": 0.7993, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00021631479395051113, + "loss": 0.7823, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8907982261640798, + "eval_loss": 0.8548955917358398, + "eval_runtime": 6.7656, + "eval_samples_per_second": 533.286, + "eval_steps_per_second": 8.425, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00025236725960892965, + "loss": 0.7747, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0002884197252673482, + "loss": 0.7782, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8149077892303467, + "eval_runtime": 6.8785, + "eval_samples_per_second": 524.534, + "eval_steps_per_second": 8.287, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003244721909257667, + "loss": 0.7704, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8378737568855286, + "eval_runtime": 6.8267, + "eval_samples_per_second": 528.515, + "eval_steps_per_second": 8.35, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003605246565841852, + "loss": 0.7725, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00039657712224260374, + "loss": 0.7654, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 0.835092306137085, + "eval_runtime": 6.8402, + "eval_samples_per_second": 527.469, + "eval_steps_per_second": 8.333, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00043262958790102226, + "loss": 0.7734, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0004686820535594408, + "loss": 0.7766, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8694567627494457, + "eval_loss": 0.8712729811668396, + "eval_runtime": 6.7988, + "eval_samples_per_second": 530.681, + "eval_steps_per_second": 8.384, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005047345192178593, + "loss": 0.7713, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8650221729490022, + "eval_loss": 0.8875982165336609, + "eval_runtime": 6.5309, + "eval_samples_per_second": 552.448, + "eval_steps_per_second": 8.728, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005407869848762778, + "loss": 0.7905, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005768394505346964, + "loss": 0.7821, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8464523281596452, + "eval_loss": 0.9217314124107361, + "eval_runtime": 6.6594, + "eval_samples_per_second": 541.79, + "eval_steps_per_second": 8.559, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005814008562398845, + "loss": 0.7868, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.897450110864745, + "eval_loss": 0.8474327921867371, + "eval_runtime": 6.946, + "eval_samples_per_second": 519.438, + "eval_steps_per_second": 8.206, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005777063925365854, + "loss": 0.7847, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.000571304914389502, + "loss": 0.7746, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.86529933481153, + "eval_loss": 0.9115758538246155, + "eval_runtime": 6.4763, + "eval_samples_per_second": 557.11, + "eval_steps_per_second": 8.801, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0005622569037647989, + "loss": 0.7812, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0005506478473984909, + "loss": 0.7699, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8738913525498891, + "eval_loss": 0.8961396217346191, + "eval_runtime": 6.9288, + "eval_samples_per_second": 520.727, + "eval_steps_per_second": 8.227, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0005365874291070688, + "loss": 0.763, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8664079822616408, + "eval_loss": 0.8935708999633789, + "eval_runtime": 6.4571, + "eval_samples_per_second": 558.764, + "eval_steps_per_second": 8.827, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005202084934810826, + "loss": 0.7679, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.000501665790752836, + "loss": 0.7616, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8486696230598669, + "eval_loss": 0.8904473781585693, + "eval_runtime": 6.7448, + "eval_samples_per_second": 534.934, + "eval_steps_per_second": 8.451, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00048113451469683683, + "loss": 0.7625, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0004588086473771035, + "loss": 0.7545, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8766629711751663, + "eval_loss": 0.861429750919342, + "eval_runtime": 6.7071, + "eval_samples_per_second": 537.94, + "eval_steps_per_second": 8.498, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00043489912638036335, + "loss": 0.7515, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8988359201773836, + "eval_loss": 0.8365984559059143, + "eval_runtime": 6.5819, + "eval_samples_per_second": 548.171, + "eval_steps_per_second": 8.66, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0004096318518513516, + "loss": 0.7429, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003832455521599897, + "loss": 0.7443, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8259763121604919, + "eval_runtime": 6.5295, + "eval_samples_per_second": 552.565, + "eval_steps_per_second": 8.73, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0003559895283658896, + "loss": 0.7311, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00032812129879076205, + "loss": 0.7358, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8293570280075073, + "eval_runtime": 6.4095, + "eval_samples_per_second": 562.913, + "eval_steps_per_second": 8.893, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002999041659531051, + "loss": 0.7411, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8364214897155762, + "eval_runtime": 6.8593, + "eval_samples_per_second": 526.005, + "eval_steps_per_second": 8.31, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00027160472885307864, + "loss": 0.7235, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00024349036411180726, + "loss": 0.7217, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8243539333343506, + "eval_runtime": 6.9055, + "eval_samples_per_second": 522.48, + "eval_steps_per_second": 8.254, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00021582669976362288, + "loss": 0.7101, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8327736854553223, + "eval_runtime": 6.7681, + "eval_samples_per_second": 533.09, + "eval_steps_per_second": 8.422, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001888751055691747, + "loss": 0.7156, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00016289022356124095, + "loss": 0.7139, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.821331262588501, + "eval_runtime": 6.4382, + "eval_samples_per_second": 560.406, + "eval_steps_per_second": 8.853, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.0001381175621549552, + "loss": 0.7117, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00011479117655359454, + "loss": 0.703, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8202525973320007, + "eval_runtime": 6.7543, + "eval_samples_per_second": 534.178, + "eval_steps_per_second": 8.439, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 9.313145736574565e-05, + "loss": 0.7015, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8136817812919617, + "eval_runtime": 6.8471, + "eval_samples_per_second": 526.939, + "eval_steps_per_second": 8.325, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 7.334304832727113e-05, + "loss": 0.6987, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 5.5612912801697694e-05, + "loss": 0.6958, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8163003921508789, + "eval_runtime": 6.3243, + "eval_samples_per_second": 570.502, + "eval_steps_per_second": 9.013, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.010856732697444e-05, + "loss": 0.6986, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 2.697649889827129e-05, + "loss": 0.6977, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8098257780075073, + "eval_runtime": 6.9173, + "eval_samples_per_second": 521.592, + "eval_steps_per_second": 8.24, + "step": 1147 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0005823859837129146, + "metric": "eval/loss", + "warmup_ratio": 0.33282409043034067 + } +} diff --git a/run-81q5vyg9/checkpoint-1147/training_args.bin b/run-81q5vyg9/checkpoint-1147/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..91fd657384d99b9236221114cb70fac9861e8332 --- /dev/null +++ b/run-81q5vyg9/checkpoint-1147/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bde85a19065b2994924cf8b6cb2f154caea6aebd1877acc52e24434e1e5e8f9 +size 4792 diff --git a/run-81q5vyg9/checkpoint-1260/model.safetensors b/run-81q5vyg9/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ebd5c4f6b68f8423b8fa338719302b1a6e4552c --- /dev/null +++ b/run-81q5vyg9/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e435cd19ad76e1ffdf53b0446ce922b4870fb23711277eb94fdc988636dc5c +size 198025308 diff --git a/run-81q5vyg9/checkpoint-1260/optimizer.pt b/run-81q5vyg9/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e230187fc7bbc7e2caf86ea0d9a5ec900da80ecc --- /dev/null +++ b/run-81q5vyg9/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8dd025c78d8dd43e3bb0b4c696ce04da02a8c49a18ed028d832440a0c60577c +size 395900602 diff --git a/run-81q5vyg9/checkpoint-1260/rng_state.pth b/run-81q5vyg9/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-81q5vyg9/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-81q5vyg9/checkpoint-1260/scheduler.pt b/run-81q5vyg9/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a36583c4f3c0a8c206ef320b03f202c5aea2a058 --- /dev/null +++ b/run-81q5vyg9/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44adc042e59e47a23ec20e4fbc73b3dd3e2aebe38fa219bf416eacd103a8bc57 +size 1064 diff --git a/run-81q5vyg9/checkpoint-1260/trainer_state.json b/run-81q5vyg9/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bf7df963cac5dcd2380e437e3a69204f1f39a1a1 --- /dev/null +++ b/run-81q5vyg9/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9196230598669624, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-81q5vyg9/checkpoint-1147", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.605246565841852e-05, + "loss": 1.2925, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8783259423503326, + "eval_loss": 0.9201260209083557, + "eval_runtime": 6.3495, + "eval_samples_per_second": 568.235, + "eval_steps_per_second": 8.977, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 7.210493131683704e-05, + "loss": 0.9132, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00010815739697525557, + "loss": 0.8265, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8196706175804138, + "eval_runtime": 6.4546, + "eval_samples_per_second": 558.985, + "eval_steps_per_second": 8.831, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001442098626336741, + "loss": 0.8073, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8966186252771619, + "eval_loss": 0.8376795649528503, + "eval_runtime": 6.6742, + "eval_samples_per_second": 540.59, + "eval_steps_per_second": 8.54, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001802623282920926, + "loss": 0.7993, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00021631479395051113, + "loss": 0.7823, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8907982261640798, + "eval_loss": 0.8548955917358398, + "eval_runtime": 6.7656, + "eval_samples_per_second": 533.286, + "eval_steps_per_second": 8.425, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00025236725960892965, + "loss": 0.7747, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0002884197252673482, + "loss": 0.7782, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8149077892303467, + "eval_runtime": 6.8785, + "eval_samples_per_second": 524.534, + "eval_steps_per_second": 8.287, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003244721909257667, + "loss": 0.7704, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8378737568855286, + "eval_runtime": 6.8267, + "eval_samples_per_second": 528.515, + "eval_steps_per_second": 8.35, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003605246565841852, + "loss": 0.7725, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00039657712224260374, + "loss": 0.7654, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 0.835092306137085, + "eval_runtime": 6.8402, + "eval_samples_per_second": 527.469, + "eval_steps_per_second": 8.333, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00043262958790102226, + "loss": 0.7734, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0004686820535594408, + "loss": 0.7766, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8694567627494457, + "eval_loss": 0.8712729811668396, + "eval_runtime": 6.7988, + "eval_samples_per_second": 530.681, + "eval_steps_per_second": 8.384, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005047345192178593, + "loss": 0.7713, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8650221729490022, + "eval_loss": 0.8875982165336609, + "eval_runtime": 6.5309, + "eval_samples_per_second": 552.448, + "eval_steps_per_second": 8.728, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005407869848762778, + "loss": 0.7905, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005768394505346964, + "loss": 0.7821, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8464523281596452, + "eval_loss": 0.9217314124107361, + "eval_runtime": 6.6594, + "eval_samples_per_second": 541.79, + "eval_steps_per_second": 8.559, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005814008562398845, + "loss": 0.7868, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.897450110864745, + "eval_loss": 0.8474327921867371, + "eval_runtime": 6.946, + "eval_samples_per_second": 519.438, + "eval_steps_per_second": 8.206, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005777063925365854, + "loss": 0.7847, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.000571304914389502, + "loss": 0.7746, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.86529933481153, + "eval_loss": 0.9115758538246155, + "eval_runtime": 6.4763, + "eval_samples_per_second": 557.11, + "eval_steps_per_second": 8.801, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0005622569037647989, + "loss": 0.7812, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0005506478473984909, + "loss": 0.7699, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8738913525498891, + "eval_loss": 0.8961396217346191, + "eval_runtime": 6.9288, + "eval_samples_per_second": 520.727, + "eval_steps_per_second": 8.227, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0005365874291070688, + "loss": 0.763, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8664079822616408, + "eval_loss": 0.8935708999633789, + "eval_runtime": 6.4571, + "eval_samples_per_second": 558.764, + "eval_steps_per_second": 8.827, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005202084934810826, + "loss": 0.7679, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.000501665790752836, + "loss": 0.7616, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8486696230598669, + "eval_loss": 0.8904473781585693, + "eval_runtime": 6.7448, + "eval_samples_per_second": 534.934, + "eval_steps_per_second": 8.451, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00048113451469683683, + "loss": 0.7625, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0004588086473771035, + "loss": 0.7545, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8766629711751663, + "eval_loss": 0.861429750919342, + "eval_runtime": 6.7071, + "eval_samples_per_second": 537.94, + "eval_steps_per_second": 8.498, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00043489912638036335, + "loss": 0.7515, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8988359201773836, + "eval_loss": 0.8365984559059143, + "eval_runtime": 6.5819, + "eval_samples_per_second": 548.171, + "eval_steps_per_second": 8.66, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0004096318518513516, + "loss": 0.7429, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003832455521599897, + "loss": 0.7443, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8259763121604919, + "eval_runtime": 6.5295, + "eval_samples_per_second": 552.565, + "eval_steps_per_second": 8.73, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0003559895283658896, + "loss": 0.7311, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00032812129879076205, + "loss": 0.7358, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8293570280075073, + "eval_runtime": 6.4095, + "eval_samples_per_second": 562.913, + "eval_steps_per_second": 8.893, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002999041659531051, + "loss": 0.7411, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8364214897155762, + "eval_runtime": 6.8593, + "eval_samples_per_second": 526.005, + "eval_steps_per_second": 8.31, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00027160472885307864, + "loss": 0.7235, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00024349036411180726, + "loss": 0.7217, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8243539333343506, + "eval_runtime": 6.9055, + "eval_samples_per_second": 522.48, + "eval_steps_per_second": 8.254, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00021582669976362288, + "loss": 0.7101, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8327736854553223, + "eval_runtime": 6.7681, + "eval_samples_per_second": 533.09, + "eval_steps_per_second": 8.422, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001888751055691747, + "loss": 0.7156, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00016289022356124095, + "loss": 0.7139, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.821331262588501, + "eval_runtime": 6.4382, + "eval_samples_per_second": 560.406, + "eval_steps_per_second": 8.853, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.0001381175621549552, + "loss": 0.7117, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00011479117655359454, + "loss": 0.703, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8202525973320007, + "eval_runtime": 6.7543, + "eval_samples_per_second": 534.178, + "eval_steps_per_second": 8.439, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 9.313145736574565e-05, + "loss": 0.7015, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8136817812919617, + "eval_runtime": 6.8471, + "eval_samples_per_second": 526.939, + "eval_steps_per_second": 8.325, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 7.334304832727113e-05, + "loss": 0.6987, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 5.5612912801697694e-05, + "loss": 0.6958, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8163003921508789, + "eval_runtime": 6.3243, + "eval_samples_per_second": 570.502, + "eval_steps_per_second": 9.013, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.010856732697444e-05, + "loss": 0.6986, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 2.697649889827129e-05, + "loss": 0.6977, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8098257780075073, + "eval_runtime": 6.9173, + "eval_samples_per_second": 521.592, + "eval_steps_per_second": 8.24, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.6340780940527024e-05, + "loss": 0.6968, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8173216581344604, + "eval_runtime": 6.8115, + "eval_samples_per_second": 529.693, + "eval_steps_per_second": 8.368, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 8.301901047212941e-06, + "loss": 0.6892, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.9358115609818017e-06, + "loss": 0.6924, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8124846816062927, + "eval_runtime": 6.6089, + "eval_samples_per_second": 545.933, + "eval_steps_per_second": 8.625, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 2.9321196643497514e-07, + "loss": 0.694, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8077273368835449, + "eval_runtime": 6.9478, + "eval_samples_per_second": 519.302, + "eval_steps_per_second": 8.204, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0005823859837129146, + "metric": "eval/loss", + "warmup_ratio": 0.33282409043034067 + } +} diff --git a/run-81q5vyg9/checkpoint-1260/training_args.bin b/run-81q5vyg9/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..91fd657384d99b9236221114cb70fac9861e8332 --- /dev/null +++ b/run-81q5vyg9/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bde85a19065b2994924cf8b6cb2f154caea6aebd1877acc52e24434e1e5e8f9 +size 4792 diff --git a/run-82i3twk4/checkpoint-1147/model.safetensors b/run-82i3twk4/checkpoint-1147/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a5f4098d6ffc651a37a004b3b2a41296327d9cc --- /dev/null +++ b/run-82i3twk4/checkpoint-1147/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00f43ad3502300928b60284cda4e8da45659494ce4b49891714939499cf87425 +size 198025308 diff --git a/run-82i3twk4/checkpoint-1147/optimizer.pt b/run-82i3twk4/checkpoint-1147/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e54d742ea311fd574a7293d962fe53e1fe6abdf4 --- /dev/null +++ b/run-82i3twk4/checkpoint-1147/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f319fc4a82ab676057019297f3edbf72d390655beec568f900013ae0fa2cd06 +size 395900602 diff --git a/run-82i3twk4/checkpoint-1147/rng_state.pth b/run-82i3twk4/checkpoint-1147/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..43b1a3175dffb3289ba56a1e7f78b36ca1615834 --- /dev/null +++ b/run-82i3twk4/checkpoint-1147/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2d43d63184b1920f250efdd6f38efa027691f238090c0a0b0f43317419a2de +size 14244 diff --git a/run-82i3twk4/checkpoint-1147/scheduler.pt b/run-82i3twk4/checkpoint-1147/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f442f3d99ce5c69e38b7212d36fda46320d441ff --- /dev/null +++ b/run-82i3twk4/checkpoint-1147/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0efa270f31740840314f06dbc5920b78cf924187f9eb7c8ff8e081b63ff491 +size 1064 diff --git a/run-82i3twk4/checkpoint-1147/trainer_state.json b/run-82i3twk4/checkpoint-1147/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..230bfeab9b3f2df820dfdf482a43ed3466118ef9 --- /dev/null +++ b/run-82i3twk4/checkpoint-1147/trainer_state.json @@ -0,0 +1,534 @@ +{ + "best_metric": 0.9243348115299335, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-82i3twk4/checkpoint-1147", + "epoch": 26.988235294117647, + "eval_steps": 500, + "global_step": 1147, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.751379048878623e-05, + "loss": 1.3297, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8536585365853658, + "eval_loss": 0.9273630976676941, + "eval_runtime": 6.7978, + "eval_samples_per_second": 530.761, + "eval_steps_per_second": 8.385, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 5.502758097757246e-05, + "loss": 0.9373, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 8.25413714663587e-05, + "loss": 0.8456, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8239915370941162, + "eval_runtime": 7.0837, + "eval_samples_per_second": 509.34, + "eval_steps_per_second": 8.047, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00011005516195514492, + "loss": 0.8112, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8065152168273926, + "eval_runtime": 6.5665, + "eval_samples_per_second": 549.458, + "eval_steps_per_second": 8.68, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00013756895244393115, + "loss": 0.7978, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001650827429327174, + "loss": 0.7833, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.840787410736084, + "eval_runtime": 6.8356, + "eval_samples_per_second": 527.825, + "eval_steps_per_second": 8.339, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00019259653342150362, + "loss": 0.7785, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00022011032391028984, + "loss": 0.7733, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8073676824569702, + "eval_runtime": 6.9742, + "eval_samples_per_second": 517.338, + "eval_steps_per_second": 8.173, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002476241143990761, + "loss": 0.7699, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8161069750785828, + "eval_runtime": 6.8158, + "eval_samples_per_second": 529.355, + "eval_steps_per_second": 8.363, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0002751379048878623, + "loss": 0.7704, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00030265169537664854, + "loss": 0.7619, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8210544586181641, + "eval_runtime": 6.9853, + "eval_samples_per_second": 516.516, + "eval_steps_per_second": 8.16, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0003301654858654348, + "loss": 0.7665, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003502226504112582, + "loss": 0.7713, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8791574279379157, + "eval_loss": 0.8861579895019531, + "eval_runtime": 6.8637, + "eval_samples_per_second": 525.662, + "eval_steps_per_second": 8.305, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00034918230948071695, + "loss": 0.7556, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8371843099594116, + "eval_runtime": 6.9555, + "eval_samples_per_second": 518.725, + "eval_steps_per_second": 8.195, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003467973460139984, + "loss": 0.7686, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00034308618541196935, + "loss": 0.7611, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8622505543237251, + "eval_loss": 0.9295992851257324, + "eval_runtime": 6.7777, + "eval_samples_per_second": 532.332, + "eval_steps_per_second": 8.41, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00033807749881476586, + "loss": 0.7569, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8713968957871396, + "eval_loss": 0.8740109205245972, + "eval_runtime": 6.9961, + "eval_samples_per_second": 515.715, + "eval_steps_per_second": 8.147, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00033180998159850503, + "loss": 0.749, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0003243320544282247, + "loss": 0.7422, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9027161862527716, + "eval_loss": 0.823365330696106, + "eval_runtime": 6.5282, + "eval_samples_per_second": 552.676, + "eval_steps_per_second": 8.731, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003157014891766135, + "loss": 0.7426, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003059849625985514, + "loss": 0.7382, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.818187952041626, + "eval_runtime": 6.8485, + "eval_samples_per_second": 526.831, + "eval_steps_per_second": 8.323, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00029525754120961954, + "loss": 0.7331, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8297945857048035, + "eval_runtime": 6.3715, + "eval_samples_per_second": 566.27, + "eval_steps_per_second": 8.946, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00028360210134822613, + "loss": 0.7392, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002711086889017496, + "loss": 0.7229, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8885809312638581, + "eval_loss": 0.8516421318054199, + "eval_runtime": 6.8166, + "eval_samples_per_second": 529.297, + "eval_steps_per_second": 8.362, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002578738236432323, + "loss": 0.7334, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00024399975355307728, + "loss": 0.7246, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8234266638755798, + "eval_runtime": 6.7052, + "eval_samples_per_second": 538.091, + "eval_steps_per_second": 8.501, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.000229593664886602, + "loss": 0.725, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8274281620979309, + "eval_runtime": 6.9439, + "eval_samples_per_second": 519.591, + "eval_steps_per_second": 8.209, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00021476685409019136, + "loss": 0.7172, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00019963386796353997, + "loss": 0.7122, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8116059899330139, + "eval_runtime": 7.0417, + "eval_samples_per_second": 512.379, + "eval_steps_per_second": 8.095, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00018431161871079166, + "loss": 0.711, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00016891848071738422, + "loss": 0.7098, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8198396563529968, + "eval_runtime": 7.0235, + "eval_samples_per_second": 513.708, + "eval_steps_per_second": 8.116, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00015357337603058807, + "loss": 0.7102, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8167212605476379, + "eval_runtime": 6.7872, + "eval_samples_per_second": 531.587, + "eval_steps_per_second": 8.398, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00013839485560900044, + "loss": 0.7065, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00012350018343894082, + "loss": 0.7002, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8118600845336914, + "eval_runtime": 6.5283, + "eval_samples_per_second": 552.667, + "eval_steps_per_second": 8.731, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00010900443059354884, + "loss": 0.6975, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8172564506530762, + "eval_runtime": 6.281, + "eval_samples_per_second": 574.43, + "eval_steps_per_second": 9.075, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 9.501958623356882e-05, + "loss": 0.7002, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 8.165369241792081e-05, + "loss": 0.6986, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8113449215888977, + "eval_runtime": 6.825, + "eval_samples_per_second": 528.641, + "eval_steps_per_second": 8.352, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.901000940821228e-05, + "loss": 0.6964, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.718621791575759e-05, + "loss": 0.694, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.810621976852417, + "eval_runtime": 6.8017, + "eval_samples_per_second": 530.452, + "eval_steps_per_second": 8.38, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.627366445426847e-05, + "loss": 0.6891, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8037423491477966, + "eval_runtime": 6.8293, + "eval_samples_per_second": 528.314, + "eval_steps_per_second": 8.346, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.6356655628359785e-05, + "loss": 0.6909, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.7511806809951213e-05, + "loss": 0.6878, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8058947920799255, + "eval_runtime": 6.9836, + "eval_samples_per_second": 516.642, + "eval_steps_per_second": 8.162, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.9807450234467095e-05, + "loss": 0.6883, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.3303107089676454e-05, + "loss": 0.6907, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.8011569976806641, + "eval_runtime": 6.6519, + "eval_samples_per_second": 542.401, + "eval_steps_per_second": 8.569, + "step": 1147 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00035027171737647087, + "metric": "eval/loss", + "warmup_ratio": 0.26235806783531546 + } +} diff --git a/run-82i3twk4/checkpoint-1147/training_args.bin b/run-82i3twk4/checkpoint-1147/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc13d242a24a3c1b98170ad0a0bc01b5a676a730 --- /dev/null +++ b/run-82i3twk4/checkpoint-1147/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc10cb6c92b1037ce4cfd48fd5f06c64e72fca2b38edde74e77441f214847f5 +size 4792 diff --git a/run-82i3twk4/checkpoint-1260/model.safetensors b/run-82i3twk4/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4117d3c544c7674c1f7dd5c755de0c6c1fe8fb5b --- /dev/null +++ b/run-82i3twk4/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d47213216f14d289c78124cb20637f3a06ce180459d54c1d20955f48fb59e91 +size 198025308 diff --git a/run-82i3twk4/checkpoint-1260/optimizer.pt b/run-82i3twk4/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfab1001c0a6124e62a777f6b975fa8daf50b4ca --- /dev/null +++ b/run-82i3twk4/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52876b38f5c7359a0100e2b2b61db6954ad40105040b47fbb056a633789eb075 +size 395900602 diff --git a/run-82i3twk4/checkpoint-1260/rng_state.pth b/run-82i3twk4/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-82i3twk4/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-82i3twk4/checkpoint-1260/scheduler.pt b/run-82i3twk4/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..db132ebce31b47cf35a51825e302a8a3aff43c5e --- /dev/null +++ b/run-82i3twk4/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe9e9ec061417bbb12802f840132713278609f7598e474713222f2e672f5178 +size 1064 diff --git a/run-82i3twk4/checkpoint-1260/trainer_state.json b/run-82i3twk4/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9af1f3d69362841e995835f21f4772dfbc309516 --- /dev/null +++ b/run-82i3twk4/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9243348115299335, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-82i3twk4/checkpoint-1147", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.751379048878623e-05, + "loss": 1.3297, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8536585365853658, + "eval_loss": 0.9273630976676941, + "eval_runtime": 6.7978, + "eval_samples_per_second": 530.761, + "eval_steps_per_second": 8.385, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 5.502758097757246e-05, + "loss": 0.9373, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 8.25413714663587e-05, + "loss": 0.8456, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8239915370941162, + "eval_runtime": 7.0837, + "eval_samples_per_second": 509.34, + "eval_steps_per_second": 8.047, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00011005516195514492, + "loss": 0.8112, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8065152168273926, + "eval_runtime": 6.5665, + "eval_samples_per_second": 549.458, + "eval_steps_per_second": 8.68, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00013756895244393115, + "loss": 0.7978, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001650827429327174, + "loss": 0.7833, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.840787410736084, + "eval_runtime": 6.8356, + "eval_samples_per_second": 527.825, + "eval_steps_per_second": 8.339, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00019259653342150362, + "loss": 0.7785, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00022011032391028984, + "loss": 0.7733, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8073676824569702, + "eval_runtime": 6.9742, + "eval_samples_per_second": 517.338, + "eval_steps_per_second": 8.173, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002476241143990761, + "loss": 0.7699, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8161069750785828, + "eval_runtime": 6.8158, + "eval_samples_per_second": 529.355, + "eval_steps_per_second": 8.363, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0002751379048878623, + "loss": 0.7704, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00030265169537664854, + "loss": 0.7619, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8210544586181641, + "eval_runtime": 6.9853, + "eval_samples_per_second": 516.516, + "eval_steps_per_second": 8.16, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0003301654858654348, + "loss": 0.7665, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003502226504112582, + "loss": 0.7713, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8791574279379157, + "eval_loss": 0.8861579895019531, + "eval_runtime": 6.8637, + "eval_samples_per_second": 525.662, + "eval_steps_per_second": 8.305, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00034918230948071695, + "loss": 0.7556, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8371843099594116, + "eval_runtime": 6.9555, + "eval_samples_per_second": 518.725, + "eval_steps_per_second": 8.195, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003467973460139984, + "loss": 0.7686, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00034308618541196935, + "loss": 0.7611, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8622505543237251, + "eval_loss": 0.9295992851257324, + "eval_runtime": 6.7777, + "eval_samples_per_second": 532.332, + "eval_steps_per_second": 8.41, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00033807749881476586, + "loss": 0.7569, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8713968957871396, + "eval_loss": 0.8740109205245972, + "eval_runtime": 6.9961, + "eval_samples_per_second": 515.715, + "eval_steps_per_second": 8.147, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00033180998159850503, + "loss": 0.749, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0003243320544282247, + "loss": 0.7422, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9027161862527716, + "eval_loss": 0.823365330696106, + "eval_runtime": 6.5282, + "eval_samples_per_second": 552.676, + "eval_steps_per_second": 8.731, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003157014891766135, + "loss": 0.7426, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003059849625985514, + "loss": 0.7382, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.818187952041626, + "eval_runtime": 6.8485, + "eval_samples_per_second": 526.831, + "eval_steps_per_second": 8.323, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00029525754120961954, + "loss": 0.7331, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8297945857048035, + "eval_runtime": 6.3715, + "eval_samples_per_second": 566.27, + "eval_steps_per_second": 8.946, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00028360210134822613, + "loss": 0.7392, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002711086889017496, + "loss": 0.7229, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8885809312638581, + "eval_loss": 0.8516421318054199, + "eval_runtime": 6.8166, + "eval_samples_per_second": 529.297, + "eval_steps_per_second": 8.362, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002578738236432323, + "loss": 0.7334, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00024399975355307728, + "loss": 0.7246, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8234266638755798, + "eval_runtime": 6.7052, + "eval_samples_per_second": 538.091, + "eval_steps_per_second": 8.501, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.000229593664886602, + "loss": 0.725, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8274281620979309, + "eval_runtime": 6.9439, + "eval_samples_per_second": 519.591, + "eval_steps_per_second": 8.209, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00021476685409019136, + "loss": 0.7172, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00019963386796353997, + "loss": 0.7122, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8116059899330139, + "eval_runtime": 7.0417, + "eval_samples_per_second": 512.379, + "eval_steps_per_second": 8.095, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00018431161871079166, + "loss": 0.711, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00016891848071738422, + "loss": 0.7098, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8198396563529968, + "eval_runtime": 7.0235, + "eval_samples_per_second": 513.708, + "eval_steps_per_second": 8.116, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00015357337603058807, + "loss": 0.7102, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8167212605476379, + "eval_runtime": 6.7872, + "eval_samples_per_second": 531.587, + "eval_steps_per_second": 8.398, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00013839485560900044, + "loss": 0.7065, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00012350018343894082, + "loss": 0.7002, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8118600845336914, + "eval_runtime": 6.5283, + "eval_samples_per_second": 552.667, + "eval_steps_per_second": 8.731, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00010900443059354884, + "loss": 0.6975, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8172564506530762, + "eval_runtime": 6.281, + "eval_samples_per_second": 574.43, + "eval_steps_per_second": 9.075, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 9.501958623356882e-05, + "loss": 0.7002, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 8.165369241792081e-05, + "loss": 0.6986, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8113449215888977, + "eval_runtime": 6.825, + "eval_samples_per_second": 528.641, + "eval_steps_per_second": 8.352, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.901000940821228e-05, + "loss": 0.6964, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.718621791575759e-05, + "loss": 0.694, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.810621976852417, + "eval_runtime": 6.8017, + "eval_samples_per_second": 530.452, + "eval_steps_per_second": 8.38, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.627366445426847e-05, + "loss": 0.6891, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8037423491477966, + "eval_runtime": 6.8293, + "eval_samples_per_second": 528.314, + "eval_steps_per_second": 8.346, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.6356655628359785e-05, + "loss": 0.6909, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.7511806809951213e-05, + "loss": 0.6878, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8058947920799255, + "eval_runtime": 6.9836, + "eval_samples_per_second": 516.642, + "eval_steps_per_second": 8.162, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.9807450234467095e-05, + "loss": 0.6883, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.3303107089676454e-05, + "loss": 0.6907, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.8011569976806641, + "eval_runtime": 6.6519, + "eval_samples_per_second": 542.401, + "eval_steps_per_second": 8.569, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 8.049027675632386e-06, + "loss": 0.69, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.8056154847145081, + "eval_runtime": 6.6157, + "eval_samples_per_second": 545.368, + "eval_steps_per_second": 8.616, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 4.085803188276178e-06, + "loss": 0.6837, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.4440521259303106e-06, + "loss": 0.6852, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8095604181289673, + "eval_runtime": 6.9346, + "eval_samples_per_second": 520.288, + "eval_steps_per_second": 8.22, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.4418374139612735e-07, + "loss": 0.6875, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.799712598323822, + "eval_runtime": 6.8613, + "eval_samples_per_second": 525.848, + "eval_steps_per_second": 8.307, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00035027171737647087, + "metric": "eval/loss", + "warmup_ratio": 0.26235806783531546 + } +} diff --git a/run-82i3twk4/checkpoint-1260/training_args.bin b/run-82i3twk4/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc13d242a24a3c1b98170ad0a0bc01b5a676a730 --- /dev/null +++ b/run-82i3twk4/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc10cb6c92b1037ce4cfd48fd5f06c64e72fca2b38edde74e77441f214847f5 +size 4792 diff --git a/run-85eme8mb/checkpoint-488/model.safetensors b/run-85eme8mb/checkpoint-488/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e77caa1b8bf1d3c138933a2e31b2dcae8b1c1892 --- /dev/null +++ b/run-85eme8mb/checkpoint-488/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8abd1f6490f25602958ceffd858cc9f4fca5cb747e8982dd49d787d620e5882e +size 198025308 diff --git a/run-85eme8mb/checkpoint-488/optimizer.pt b/run-85eme8mb/checkpoint-488/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d1e1498f61d43ffacb3c78b941d6ad944cb8b48 --- /dev/null +++ b/run-85eme8mb/checkpoint-488/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e7670c2bd3aecffe9e9e4bb963db39e42e17fcc686862e97b7e679f6cfbf30 +size 395900602 diff --git a/run-85eme8mb/checkpoint-488/rng_state.pth b/run-85eme8mb/checkpoint-488/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4e242cd45cd82eec2c9f5f7a343995f3fc07291 --- /dev/null +++ b/run-85eme8mb/checkpoint-488/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a77e9678cdc4e4bb78c6028d260e7fc1b894ddf6c0f054c0c0d7c6e8bb4722d +size 14244 diff --git a/run-85eme8mb/checkpoint-488/scheduler.pt b/run-85eme8mb/checkpoint-488/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..89afe19de3a790591f68c418caa26354cb7664cf --- /dev/null +++ b/run-85eme8mb/checkpoint-488/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4734ab8af93875dea7c6ae6cccbe8c797b8a93ae9367a501a286a71943b1f20f +size 1064 diff --git a/run-85eme8mb/checkpoint-488/trainer_state.json b/run-85eme8mb/checkpoint-488/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..23127b04caec51538a7baea2a00eddf4ef41e8e5 --- /dev/null +++ b/run-85eme8mb/checkpoint-488/trainer_state.json @@ -0,0 +1,526 @@ +{ + "best_metric": 0.9161619622632412, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-85eme8mb/checkpoint-488", + "epoch": 22.96470588235294, + "eval_steps": 500, + "global_step": 488, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.769036313449624e-05, + "loss": 1.3196, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1881415843963623, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.6239, + "eval_samples_per_second": 473.252, + "eval_steps_per_second": 3.804, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00013538072626899248, + "loss": 0.9538, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00020307108940348872, + "loss": 0.8354, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8791574279379157, + "eval_f1": 0.8819766414371885, + "eval_loss": 0.8821062445640564, + "eval_precision": 0.893492612268658, + "eval_recall": 0.8791574279379157, + "eval_runtime": 7.8214, + "eval_samples_per_second": 461.299, + "eval_steps_per_second": 3.708, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00027076145253798495, + "loss": 0.798, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9034991730273331, + "eval_loss": 0.8030994534492493, + "eval_precision": 0.9023741010083071, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.404, + "eval_samples_per_second": 429.321, + "eval_steps_per_second": 3.451, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003384518156724812, + "loss": 0.7953, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00040614217880697743, + "loss": 0.7782, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8830992760668389, + "eval_loss": 0.8315520286560059, + "eval_precision": 0.8887584055284756, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.9076, + "eval_samples_per_second": 456.269, + "eval_steps_per_second": 3.667, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00047383254194147367, + "loss": 0.7904, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.000494315024799922, + "loss": 0.7709, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.88470066518847, + "eval_f1": 0.8792197079417244, + "eval_loss": 0.8714499473571777, + "eval_precision": 0.8915081358014645, + "eval_recall": 0.88470066518847, + "eval_runtime": 8.0085, + "eval_samples_per_second": 450.52, + "eval_steps_per_second": 3.621, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004925993337975992, + "loss": 0.7758, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8722283813747228, + "eval_f1": 0.8782192456815272, + "eval_loss": 0.8707566261291504, + "eval_precision": 0.8904099404839843, + "eval_recall": 0.8722283813747228, + "eval_runtime": 7.4038, + "eval_samples_per_second": 487.315, + "eval_steps_per_second": 3.917, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0004894550418990474, + "loss": 0.7721, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004849004634256006, + "loss": 0.7618, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8317627494456763, + "eval_f1": 0.8416468620880149, + "eval_loss": 0.925355851650238, + "eval_precision": 0.8815136201744496, + "eval_recall": 0.8317627494456763, + "eval_runtime": 7.2699, + "eval_samples_per_second": 496.295, + "eval_steps_per_second": 3.989, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00047896212708909395, + "loss": 0.7589, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.000471674621472065, + "loss": 0.7485, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7380820399113082, + "eval_f1": 0.7753902012346019, + "eval_loss": 1.078928828239441, + "eval_precision": 0.8774665029119735, + "eval_recall": 0.7380820399113082, + "eval_runtime": 7.8198, + "eval_samples_per_second": 461.393, + "eval_steps_per_second": 3.709, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0004630803935622293, + "loss": 0.7462, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8877494456762749, + "eval_f1": 0.865006032310344, + "eval_loss": 0.8699197173118591, + "eval_precision": 0.874847434559607, + "eval_recall": 0.8877494456762749, + "eval_runtime": 7.6224, + "eval_samples_per_second": 473.339, + "eval_steps_per_second": 3.805, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00045322950151469314, + "loss": 0.7555, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004421793230819692, + "loss": 0.7517, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8880266075388027, + "eval_f1": 0.8853801970811287, + "eval_loss": 0.8529537320137024, + "eval_precision": 0.8853901936784109, + "eval_recall": 0.8880266075388027, + "eval_runtime": 7.3755, + "eval_samples_per_second": 489.189, + "eval_steps_per_second": 3.932, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00042999422141008287, + "loss": 0.7337, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8841463414634146, + "eval_f1": 0.8772099218363365, + "eval_loss": 0.8580551743507385, + "eval_precision": 0.8865638918099502, + "eval_recall": 0.8841463414634146, + "eval_runtime": 8.036, + "eval_samples_per_second": 448.98, + "eval_steps_per_second": 3.609, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0004167451701473798, + "loss": 0.747, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00040250934004963646, + "loss": 0.7287, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8458980044345898, + "eval_f1": 0.8639919982519135, + "eval_loss": 0.9147387742996216, + "eval_precision": 0.8936918385327249, + "eval_recall": 0.8458980044345898, + "eval_runtime": 7.8732, + "eval_samples_per_second": 458.264, + "eval_steps_per_second": 3.683, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00038736964948934364, + "loss": 0.7244, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003714142814872772, + "loss": 0.7298, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.8966937119583347, + "eval_loss": 0.8240708112716675, + "eval_precision": 0.8931256835906773, + "eval_recall": 0.9038248337028825, + "eval_runtime": 7.7967, + "eval_samples_per_second": 462.757, + "eval_steps_per_second": 3.72, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.000354736170079467, + "loss": 0.7216, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8811592190788098, + "eval_loss": 0.8562313914299011, + "eval_precision": 0.886463349923117, + "eval_recall": 0.893569844789357, + "eval_runtime": 7.6829, + "eval_samples_per_second": 469.613, + "eval_steps_per_second": 3.775, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003374324590112839, + "loss": 0.7206, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003196039359115467, + "loss": 0.7157, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.8928846076917938, + "eval_loss": 0.832081139087677, + "eval_precision": 0.8890446765760062, + "eval_recall": 0.8999445676274944, + "eval_runtime": 7.6948, + "eval_samples_per_second": 468.886, + "eval_steps_per_second": 3.769, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003013544452423761, + "loss": 0.7163, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00028279028344414017, + "loss": 0.7154, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8984330912662029, + "eval_loss": 0.8270737528800964, + "eval_precision": 0.9035195234145866, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.2059, + "eval_samples_per_second": 500.699, + "eval_steps_per_second": 4.024, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0002640195797985451, + "loss": 0.7066, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.8940901600559393, + "eval_loss": 0.8283047080039978, + "eval_precision": 0.8919570440109909, + "eval_recall": 0.9032705099778271, + "eval_runtime": 7.6095, + "eval_samples_per_second": 474.144, + "eval_steps_per_second": 3.811, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00024515166661611386, + "loss": 0.7085, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002262964424164719, + "loss": 0.704, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.8988359201773836, + "eval_f1": 0.8989473662850057, + "eval_loss": 0.8299522399902344, + "eval_precision": 0.901808161543132, + "eval_recall": 0.8988359201773836, + "eval_runtime": 7.9036, + "eval_samples_per_second": 456.503, + "eval_steps_per_second": 3.669, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00020756373181067783, + "loss": 0.7018, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00018906264581404342, + "loss": 0.7018, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9050414647372181, + "eval_loss": 0.8178835511207581, + "eval_precision": 0.9043926853417746, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.8957, + "eval_samples_per_second": 456.959, + "eval_steps_per_second": 3.673, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00017090094631538153, + "loss": 0.697, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.902696568607567, + "eval_loss": 0.828353762626648, + "eval_precision": 0.9010232411022195, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.6384, + "eval_samples_per_second": 472.35, + "eval_steps_per_second": 3.797, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.0001531844184044096, + "loss": 0.6977, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00013601625421326736, + "loss": 0.697, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9082103323961574, + "eval_loss": 0.8138181567192078, + "eval_precision": 0.9106225087976652, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.3408, + "eval_samples_per_second": 491.503, + "eval_steps_per_second": 3.951, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00011949645186103925, + "loss": 0.694, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9035452829487767, + "eval_loss": 0.8190215826034546, + "eval_precision": 0.9009555723686802, + "eval_recall": 0.9096452328159645, + "eval_runtime": 8.01, + "eval_samples_per_second": 450.438, + "eval_steps_per_second": 3.62, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00010372123300220627, + "loss": 0.6941, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 8.878248237158741e-05, + "loss": 0.6937, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9215631929046563, + "eval_f1": 0.9161619622632412, + "eval_loss": 0.8059842586517334, + "eval_precision": 0.9147348942136364, + "eval_recall": 0.9215631929046563, + "eval_runtime": 8.2025, + "eval_samples_per_second": 439.864, + "eval_steps_per_second": 3.535, + "step": 488 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3428059633871926, + "learning_rate": 0.0004946603459828571, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-85eme8mb/checkpoint-488/training_args.bin b/run-85eme8mb/checkpoint-488/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c3bda2a3aae62e9ca73bcbeaece22ffe82ba505 --- /dev/null +++ b/run-85eme8mb/checkpoint-488/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ad040f98f456173cfce6406a5f0c6b8bd78c3ba556d6de8a462aab8712f35c6 +size 4792 diff --git a/run-85eme8mb/checkpoint-630/model.safetensors b/run-85eme8mb/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d68b87623c2aa53b4e639e5af5f27d2d0e1c0d7f --- /dev/null +++ b/run-85eme8mb/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c1ba5d51a45f45b23e5a53c51059eeaa895af3742d849c7353d399c742995a +size 198025308 diff --git a/run-85eme8mb/checkpoint-630/optimizer.pt b/run-85eme8mb/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecfd1b0cfc3abd33b2824967888d503e0addb4cb --- /dev/null +++ b/run-85eme8mb/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343be75b7b5b0edafc07f98b2eaa8e93b419167acca685326a2fec4a8a36a98b +size 395900602 diff --git a/run-85eme8mb/checkpoint-630/rng_state.pth b/run-85eme8mb/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-85eme8mb/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-85eme8mb/checkpoint-630/scheduler.pt b/run-85eme8mb/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..14c8cf1189aba51e365eb4579e79c1b69195a280 --- /dev/null +++ b/run-85eme8mb/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2cd4c7cf44b7f3f94b8a10968f57ebc3b3aa47e248b19adb4338002980cbd4 +size 1064 diff --git a/run-85eme8mb/checkpoint-630/trainer_state.json b/run-85eme8mb/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..35cfeae2fab1bebefb27becae5c29b80fc3162dc --- /dev/null +++ b/run-85eme8mb/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9161619622632412, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-85eme8mb/checkpoint-488", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.769036313449624e-05, + "loss": 1.3196, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1881415843963623, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.6239, + "eval_samples_per_second": 473.252, + "eval_steps_per_second": 3.804, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00013538072626899248, + "loss": 0.9538, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00020307108940348872, + "loss": 0.8354, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8791574279379157, + "eval_f1": 0.8819766414371885, + "eval_loss": 0.8821062445640564, + "eval_precision": 0.893492612268658, + "eval_recall": 0.8791574279379157, + "eval_runtime": 7.8214, + "eval_samples_per_second": 461.299, + "eval_steps_per_second": 3.708, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00027076145253798495, + "loss": 0.798, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9034991730273331, + "eval_loss": 0.8030994534492493, + "eval_precision": 0.9023741010083071, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.404, + "eval_samples_per_second": 429.321, + "eval_steps_per_second": 3.451, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003384518156724812, + "loss": 0.7953, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00040614217880697743, + "loss": 0.7782, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8830992760668389, + "eval_loss": 0.8315520286560059, + "eval_precision": 0.8887584055284756, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.9076, + "eval_samples_per_second": 456.269, + "eval_steps_per_second": 3.667, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00047383254194147367, + "loss": 0.7904, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.000494315024799922, + "loss": 0.7709, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.88470066518847, + "eval_f1": 0.8792197079417244, + "eval_loss": 0.8714499473571777, + "eval_precision": 0.8915081358014645, + "eval_recall": 0.88470066518847, + "eval_runtime": 8.0085, + "eval_samples_per_second": 450.52, + "eval_steps_per_second": 3.621, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004925993337975992, + "loss": 0.7758, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8722283813747228, + "eval_f1": 0.8782192456815272, + "eval_loss": 0.8707566261291504, + "eval_precision": 0.8904099404839843, + "eval_recall": 0.8722283813747228, + "eval_runtime": 7.4038, + "eval_samples_per_second": 487.315, + "eval_steps_per_second": 3.917, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0004894550418990474, + "loss": 0.7721, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004849004634256006, + "loss": 0.7618, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8317627494456763, + "eval_f1": 0.8416468620880149, + "eval_loss": 0.925355851650238, + "eval_precision": 0.8815136201744496, + "eval_recall": 0.8317627494456763, + "eval_runtime": 7.2699, + "eval_samples_per_second": 496.295, + "eval_steps_per_second": 3.989, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00047896212708909395, + "loss": 0.7589, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.000471674621472065, + "loss": 0.7485, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7380820399113082, + "eval_f1": 0.7753902012346019, + "eval_loss": 1.078928828239441, + "eval_precision": 0.8774665029119735, + "eval_recall": 0.7380820399113082, + "eval_runtime": 7.8198, + "eval_samples_per_second": 461.393, + "eval_steps_per_second": 3.709, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0004630803935622293, + "loss": 0.7462, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8877494456762749, + "eval_f1": 0.865006032310344, + "eval_loss": 0.8699197173118591, + "eval_precision": 0.874847434559607, + "eval_recall": 0.8877494456762749, + "eval_runtime": 7.6224, + "eval_samples_per_second": 473.339, + "eval_steps_per_second": 3.805, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00045322950151469314, + "loss": 0.7555, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004421793230819692, + "loss": 0.7517, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8880266075388027, + "eval_f1": 0.8853801970811287, + "eval_loss": 0.8529537320137024, + "eval_precision": 0.8853901936784109, + "eval_recall": 0.8880266075388027, + "eval_runtime": 7.3755, + "eval_samples_per_second": 489.189, + "eval_steps_per_second": 3.932, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00042999422141008287, + "loss": 0.7337, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8841463414634146, + "eval_f1": 0.8772099218363365, + "eval_loss": 0.8580551743507385, + "eval_precision": 0.8865638918099502, + "eval_recall": 0.8841463414634146, + "eval_runtime": 8.036, + "eval_samples_per_second": 448.98, + "eval_steps_per_second": 3.609, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0004167451701473798, + "loss": 0.747, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00040250934004963646, + "loss": 0.7287, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8458980044345898, + "eval_f1": 0.8639919982519135, + "eval_loss": 0.9147387742996216, + "eval_precision": 0.8936918385327249, + "eval_recall": 0.8458980044345898, + "eval_runtime": 7.8732, + "eval_samples_per_second": 458.264, + "eval_steps_per_second": 3.683, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00038736964948934364, + "loss": 0.7244, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003714142814872772, + "loss": 0.7298, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.8966937119583347, + "eval_loss": 0.8240708112716675, + "eval_precision": 0.8931256835906773, + "eval_recall": 0.9038248337028825, + "eval_runtime": 7.7967, + "eval_samples_per_second": 462.757, + "eval_steps_per_second": 3.72, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.000354736170079467, + "loss": 0.7216, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8811592190788098, + "eval_loss": 0.8562313914299011, + "eval_precision": 0.886463349923117, + "eval_recall": 0.893569844789357, + "eval_runtime": 7.6829, + "eval_samples_per_second": 469.613, + "eval_steps_per_second": 3.775, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003374324590112839, + "loss": 0.7206, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003196039359115467, + "loss": 0.7157, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.8928846076917938, + "eval_loss": 0.832081139087677, + "eval_precision": 0.8890446765760062, + "eval_recall": 0.8999445676274944, + "eval_runtime": 7.6948, + "eval_samples_per_second": 468.886, + "eval_steps_per_second": 3.769, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003013544452423761, + "loss": 0.7163, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00028279028344414017, + "loss": 0.7154, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8984330912662029, + "eval_loss": 0.8270737528800964, + "eval_precision": 0.9035195234145866, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.2059, + "eval_samples_per_second": 500.699, + "eval_steps_per_second": 4.024, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0002640195797985451, + "loss": 0.7066, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.8940901600559393, + "eval_loss": 0.8283047080039978, + "eval_precision": 0.8919570440109909, + "eval_recall": 0.9032705099778271, + "eval_runtime": 7.6095, + "eval_samples_per_second": 474.144, + "eval_steps_per_second": 3.811, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00024515166661611386, + "loss": 0.7085, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002262964424164719, + "loss": 0.704, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.8988359201773836, + "eval_f1": 0.8989473662850057, + "eval_loss": 0.8299522399902344, + "eval_precision": 0.901808161543132, + "eval_recall": 0.8988359201773836, + "eval_runtime": 7.9036, + "eval_samples_per_second": 456.503, + "eval_steps_per_second": 3.669, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00020756373181067783, + "loss": 0.7018, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00018906264581404342, + "loss": 0.7018, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9050414647372181, + "eval_loss": 0.8178835511207581, + "eval_precision": 0.9043926853417746, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.8957, + "eval_samples_per_second": 456.959, + "eval_steps_per_second": 3.673, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00017090094631538153, + "loss": 0.697, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.902696568607567, + "eval_loss": 0.828353762626648, + "eval_precision": 0.9010232411022195, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.6384, + "eval_samples_per_second": 472.35, + "eval_steps_per_second": 3.797, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.0001531844184044096, + "loss": 0.6977, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00013601625421326736, + "loss": 0.697, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9082103323961574, + "eval_loss": 0.8138181567192078, + "eval_precision": 0.9106225087976652, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.3408, + "eval_samples_per_second": 491.503, + "eval_steps_per_second": 3.951, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00011949645186103925, + "loss": 0.694, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9035452829487767, + "eval_loss": 0.8190215826034546, + "eval_precision": 0.9009555723686802, + "eval_recall": 0.9096452328159645, + "eval_runtime": 8.01, + "eval_samples_per_second": 450.438, + "eval_steps_per_second": 3.62, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00010372123300220627, + "loss": 0.6941, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 8.878248237158741e-05, + "loss": 0.6937, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9215631929046563, + "eval_f1": 0.9161619622632412, + "eval_loss": 0.8059842586517334, + "eval_precision": 0.9147348942136364, + "eval_recall": 0.9215631929046563, + "eval_runtime": 8.2025, + "eval_samples_per_second": 439.864, + "eval_steps_per_second": 3.535, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 7.476721259021197e-05, + "loss": 0.69, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.175705734942856e-05, + "loss": 0.6898, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9089539935700177, + "eval_loss": 0.8168031573295593, + "eval_precision": 0.9084967133419092, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.9793, + "eval_samples_per_second": 452.172, + "eval_steps_per_second": 3.634, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 4.982779592526191e-05, + "loss": 0.6908, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.914573347210123, + "eval_loss": 0.8118212223052979, + "eval_precision": 0.912328945102129, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.7255, + "eval_samples_per_second": 467.023, + "eval_steps_per_second": 3.754, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 3.904891179254372e-05, + "loss": 0.6867, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.9483187909725668e-05, + "loss": 0.6875, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9139574860984034, + "eval_loss": 0.8082780838012695, + "eval_precision": 0.9125348242950756, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.7817, + "eval_samples_per_second": 463.649, + "eval_steps_per_second": 3.727, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.1186341031687038e-05, + "loss": 0.6876, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.4206697180528841e-05, + "loss": 0.6866, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9086687281550373, + "eval_loss": 0.8183485269546509, + "eval_precision": 0.9066350602654811, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.5088, + "eval_samples_per_second": 480.501, + "eval_steps_per_second": 3.862, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 8.584910164615561e-06, + "loss": 0.6862, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9155313625819885, + "eval_loss": 0.8039478659629822, + "eval_precision": 0.9128500085659037, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.8039, + "eval_samples_per_second": 462.331, + "eval_steps_per_second": 3.716, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 4.353724785384726e-06, + "loss": 0.6881, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.537786111156398e-06, + "loss": 0.6877, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9117548608063331, + "eval_loss": 0.8037508130073547, + "eval_precision": 0.9104908962385095, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.8957, + "eval_samples_per_second": 456.957, + "eval_steps_per_second": 3.673, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.5349592885117922e-07, + "loss": 0.6831, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9137385560327395, + "eval_loss": 0.811510443687439, + "eval_precision": 0.9123968653892656, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.9152, + "eval_samples_per_second": 455.83, + "eval_steps_per_second": 3.664, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3428059633871926, + "learning_rate": 0.0004946603459828571, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-85eme8mb/checkpoint-630/training_args.bin b/run-85eme8mb/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c3bda2a3aae62e9ca73bcbeaece22ffe82ba505 --- /dev/null +++ b/run-85eme8mb/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ad040f98f456173cfce6406a5f0c6b8bd78c3ba556d6de8a462aab8712f35c6 +size 4792 diff --git a/run-8rke1nw3/checkpoint-1232/model.safetensors b/run-8rke1nw3/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a0a2fdf074b8e484d26b644709515cfc1bb152b --- /dev/null +++ b/run-8rke1nw3/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c7aa4d81a5d99ea0be5b56bcef44dfb2316c39bda95eda6c6f5dd3cc4e70520 +size 198025308 diff --git a/run-8rke1nw3/checkpoint-1232/optimizer.pt b/run-8rke1nw3/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ade6720144a6c8364c45446f65ed0c0239db734d --- /dev/null +++ b/run-8rke1nw3/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31609931144f287c02ce644af7657d9784c10e4ac48fc8df7cb321e98cc8e63 +size 395900602 diff --git a/run-8rke1nw3/checkpoint-1232/rng_state.pth b/run-8rke1nw3/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-8rke1nw3/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-8rke1nw3/checkpoint-1232/scheduler.pt b/run-8rke1nw3/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c077badc16a6cd6bf211153ce9c7c8f469479c32 --- /dev/null +++ b/run-8rke1nw3/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b38ca4f98f8dc19a72c15dedc6dbcdc378e6171ee64bb7a6c8e0baa5a2515dae +size 1064 diff --git a/run-8rke1nw3/checkpoint-1232/trainer_state.json b/run-8rke1nw3/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..68adcbd02b0f2057baa073781392fd18dee8b874 --- /dev/null +++ b/run-8rke1nw3/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9176829268292683, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-8rke1nw3/checkpoint-1147", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.33466685926141e-05, + "loss": 1.2704, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8766629711751663, + "eval_loss": 0.9036412835121155, + "eval_runtime": 6.7285, + "eval_samples_per_second": 536.23, + "eval_steps_per_second": 8.471, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 8.66933371852282e-05, + "loss": 0.9138, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00013004000577784228, + "loss": 0.8229, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9013303769401331, + "eval_loss": 0.8313962817192078, + "eval_runtime": 6.7262, + "eval_samples_per_second": 536.408, + "eval_steps_per_second": 8.474, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001733866743704564, + "loss": 0.8059, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8248089551925659, + "eval_runtime": 6.6651, + "eval_samples_per_second": 541.329, + "eval_steps_per_second": 8.552, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0002167333429630705, + "loss": 0.7975, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00026008001155568457, + "loss": 0.7807, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.886640798226164, + "eval_loss": 0.8524655699729919, + "eval_runtime": 6.4893, + "eval_samples_per_second": 555.993, + "eval_steps_per_second": 8.784, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003034266801482987, + "loss": 0.7784, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003467733487409128, + "loss": 0.7811, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8227518796920776, + "eval_runtime": 6.7829, + "eval_samples_per_second": 531.922, + "eval_steps_per_second": 8.403, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003901200173335269, + "loss": 0.7778, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8849778270509978, + "eval_loss": 0.8600122332572937, + "eval_runtime": 6.7819, + "eval_samples_per_second": 532.002, + "eval_steps_per_second": 8.405, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.000433466685926141, + "loss": 0.778, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004768133545187551, + "loss": 0.7838, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.823170731707317, + "eval_loss": 0.9457823038101196, + "eval_runtime": 6.3648, + "eval_samples_per_second": 566.866, + "eval_steps_per_second": 8.955, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005184914230224401, + "loss": 0.7852, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005174579671776652, + "loss": 0.7806, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8883037694013304, + "eval_loss": 0.8512657880783081, + "eval_runtime": 6.5565, + "eval_samples_per_second": 550.294, + "eval_steps_per_second": 8.694, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005145128033910003, + "loss": 0.7795, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8772172949002217, + "eval_loss": 0.883333683013916, + "eval_runtime": 6.4677, + "eval_samples_per_second": 557.851, + "eval_steps_per_second": 8.813, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005096777366246994, + "loss": 0.7849, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005029885640214596, + "loss": 0.7765, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8763858093126385, + "eval_loss": 0.8683876991271973, + "eval_runtime": 6.8326, + "eval_samples_per_second": 528.06, + "eval_steps_per_second": 8.342, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004944948098748995, + "loss": 0.7669, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8985587583148559, + "eval_loss": 0.8437188863754272, + "eval_runtime": 6.39, + "eval_samples_per_second": 564.628, + "eval_steps_per_second": 8.92, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00048425935896901723, + "loss": 0.757, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00047235799100125064, + "loss": 0.7564, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8932926829268293, + "eval_loss": 0.8618698716163635, + "eval_runtime": 6.3738, + "eval_samples_per_second": 566.071, + "eval_steps_per_second": 8.943, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004588788195361109, + "loss": 0.7639, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00044392163964317876, + "loss": 0.7586, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8874722838137472, + "eval_loss": 0.8697918653488159, + "eval_runtime": 6.674, + "eval_samples_per_second": 540.604, + "eval_steps_per_second": 8.541, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0004275971890493246, + "loss": 0.7501, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.876940133037694, + "eval_loss": 0.8900468349456787, + "eval_runtime": 6.7296, + "eval_samples_per_second": 536.137, + "eval_steps_per_second": 8.47, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0004100263282753227, + "loss": 0.7583, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003913391458268404, + "loss": 0.7368, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9021618625277162, + "eval_loss": 0.8278179168701172, + "eval_runtime": 6.4986, + "eval_samples_per_second": 555.194, + "eval_steps_per_second": 8.771, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003716739950646564, + "loss": 0.7523, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003511764698847832, + "loss": 0.7404, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9013303769401331, + "eval_loss": 0.8269362449645996, + "eval_runtime": 6.6252, + "eval_samples_per_second": 544.589, + "eval_steps_per_second": 8.604, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00032999832679219933, + "loss": 0.7347, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9041019955654102, + "eval_loss": 0.8276650309562683, + "eval_runtime": 6.6993, + "eval_samples_per_second": 538.561, + "eval_steps_per_second": 8.508, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00030829636134878206, + "loss": 0.7314, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00028623124731382477, + "loss": 0.7246, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8182592391967773, + "eval_runtime": 6.6359, + "eval_samples_per_second": 543.705, + "eval_steps_per_second": 8.59, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002639663470717383, + "loss": 0.7192, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00024166650215411175, + "loss": 0.7217, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8261517286300659, + "eval_runtime": 6.7318, + "eval_samples_per_second": 535.963, + "eval_steps_per_second": 8.467, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00021949681281068596, + "loss": 0.7281, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8152443766593933, + "eval_runtime": 6.637, + "eval_samples_per_second": 543.62, + "eval_steps_per_second": 8.588, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00019762141566486785, + "loss": 0.71, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00017620226850359705, + "loss": 0.7117, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8220065236091614, + "eval_runtime": 6.8002, + "eval_samples_per_second": 530.572, + "eval_steps_per_second": 8.382, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00015539795119855538, + "loss": 0.7061, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.8264269828796387, + "eval_runtime": 6.6227, + "eval_samples_per_second": 544.792, + "eval_steps_per_second": 8.607, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001353624916362792, + "loss": 0.7066, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00011624422534957707, + "loss": 0.7043, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8148996829986572, + "eval_runtime": 6.568, + "eval_samples_per_second": 549.332, + "eval_steps_per_second": 8.678, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.81846972931429e-05, + "loss": 0.7012, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 8.131761389423363e-05, + "loss": 0.6985, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8143835067749023, + "eval_runtime": 6.601, + "eval_samples_per_second": 546.582, + "eval_steps_per_second": 8.635, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.576785313706155e-05, + "loss": 0.6977, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8138031959533691, + "eval_runtime": 6.8329, + "eval_samples_per_second": 528.033, + "eval_steps_per_second": 8.342, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 5.1650540009890064e-05, + "loss": 0.6949, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.9070194159899655e-05, + "loss": 0.6923, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8191729784011841, + "eval_runtime": 6.7911, + "eval_samples_per_second": 531.286, + "eval_steps_per_second": 8.393, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.811995606628784e-05, + "loss": 0.6926, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.8880897460748494e-05, + "loss": 0.6933, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8060517907142639, + "eval_runtime": 6.4302, + "eval_samples_per_second": 561.105, + "eval_steps_per_second": 8.864, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.1421421100734875e-05, + "loss": 0.6931, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8159933090209961, + "eval_runtime": 6.6245, + "eval_samples_per_second": 544.644, + "eval_steps_per_second": 8.604, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 5.7967543393753565e-06, + "loss": 0.6873, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.048540241473269e-06, + "loss": 0.694, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8138334155082703, + "eval_runtime": 6.7328, + "eval_samples_per_second": 535.883, + "eval_steps_per_second": 8.466, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0005184928435501148, + "metric": "eval/loss", + "warmup_ratio": 0.24608524478152627 + } +} diff --git a/run-8rke1nw3/checkpoint-1232/training_args.bin b/run-8rke1nw3/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eccf5dcf494aea4448beeceec7256c4f951f19ff --- /dev/null +++ b/run-8rke1nw3/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc682c6cd54a918d23a4224158efcbf35d0120821ab6ff34a48d77ed1f826aa7 +size 4792 diff --git a/run-8rke1nw3/checkpoint-1260/model.safetensors b/run-8rke1nw3/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a73b43c4cb002d5788b96c0f65aa704bfa4c04e5 --- /dev/null +++ b/run-8rke1nw3/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35fe5f6ca3c3c972a6552589f970b5468b21a6452457ecbd69c462a3b73f324 +size 198025308 diff --git a/run-8rke1nw3/checkpoint-1260/optimizer.pt b/run-8rke1nw3/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9aae776b8e0bab16f8262563714289c7bf619ea8 --- /dev/null +++ b/run-8rke1nw3/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e874e92433928c20e445f0d87f037ea8193aec8fe0ebc21f1b6bbcb23d8bae5 +size 395900602 diff --git a/run-8rke1nw3/checkpoint-1260/rng_state.pth b/run-8rke1nw3/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-8rke1nw3/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-8rke1nw3/checkpoint-1260/scheduler.pt b/run-8rke1nw3/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b4b5491de345f96c5639b049be3977e6b382413 --- /dev/null +++ b/run-8rke1nw3/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39663e82a296d301f15a78741fe0c6f9f3d11b9eb1d055e003b5f54f081eebbe +size 1064 diff --git a/run-8rke1nw3/checkpoint-1260/trainer_state.json b/run-8rke1nw3/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..337d6ef0ac95580c7d4bd9c73160288b25249d5b --- /dev/null +++ b/run-8rke1nw3/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9215631929046563, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-8rke1nw3/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.33466685926141e-05, + "loss": 1.2704, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8766629711751663, + "eval_loss": 0.9036412835121155, + "eval_runtime": 6.7285, + "eval_samples_per_second": 536.23, + "eval_steps_per_second": 8.471, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 8.66933371852282e-05, + "loss": 0.9138, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00013004000577784228, + "loss": 0.8229, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9013303769401331, + "eval_loss": 0.8313962817192078, + "eval_runtime": 6.7262, + "eval_samples_per_second": 536.408, + "eval_steps_per_second": 8.474, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001733866743704564, + "loss": 0.8059, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8248089551925659, + "eval_runtime": 6.6651, + "eval_samples_per_second": 541.329, + "eval_steps_per_second": 8.552, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0002167333429630705, + "loss": 0.7975, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00026008001155568457, + "loss": 0.7807, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.886640798226164, + "eval_loss": 0.8524655699729919, + "eval_runtime": 6.4893, + "eval_samples_per_second": 555.993, + "eval_steps_per_second": 8.784, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003034266801482987, + "loss": 0.7784, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003467733487409128, + "loss": 0.7811, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8227518796920776, + "eval_runtime": 6.7829, + "eval_samples_per_second": 531.922, + "eval_steps_per_second": 8.403, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003901200173335269, + "loss": 0.7778, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8849778270509978, + "eval_loss": 0.8600122332572937, + "eval_runtime": 6.7819, + "eval_samples_per_second": 532.002, + "eval_steps_per_second": 8.405, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.000433466685926141, + "loss": 0.778, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004768133545187551, + "loss": 0.7838, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.823170731707317, + "eval_loss": 0.9457823038101196, + "eval_runtime": 6.3648, + "eval_samples_per_second": 566.866, + "eval_steps_per_second": 8.955, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005184914230224401, + "loss": 0.7852, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005174579671776652, + "loss": 0.7806, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8883037694013304, + "eval_loss": 0.8512657880783081, + "eval_runtime": 6.5565, + "eval_samples_per_second": 550.294, + "eval_steps_per_second": 8.694, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005145128033910003, + "loss": 0.7795, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8772172949002217, + "eval_loss": 0.883333683013916, + "eval_runtime": 6.4677, + "eval_samples_per_second": 557.851, + "eval_steps_per_second": 8.813, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005096777366246994, + "loss": 0.7849, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005029885640214596, + "loss": 0.7765, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8763858093126385, + "eval_loss": 0.8683876991271973, + "eval_runtime": 6.8326, + "eval_samples_per_second": 528.06, + "eval_steps_per_second": 8.342, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004944948098748995, + "loss": 0.7669, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8985587583148559, + "eval_loss": 0.8437188863754272, + "eval_runtime": 6.39, + "eval_samples_per_second": 564.628, + "eval_steps_per_second": 8.92, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00048425935896901723, + "loss": 0.757, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00047235799100125064, + "loss": 0.7564, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8932926829268293, + "eval_loss": 0.8618698716163635, + "eval_runtime": 6.3738, + "eval_samples_per_second": 566.071, + "eval_steps_per_second": 8.943, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004588788195361109, + "loss": 0.7639, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00044392163964317876, + "loss": 0.7586, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8874722838137472, + "eval_loss": 0.8697918653488159, + "eval_runtime": 6.674, + "eval_samples_per_second": 540.604, + "eval_steps_per_second": 8.541, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0004275971890493246, + "loss": 0.7501, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.876940133037694, + "eval_loss": 0.8900468349456787, + "eval_runtime": 6.7296, + "eval_samples_per_second": 536.137, + "eval_steps_per_second": 8.47, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0004100263282753227, + "loss": 0.7583, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003913391458268404, + "loss": 0.7368, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9021618625277162, + "eval_loss": 0.8278179168701172, + "eval_runtime": 6.4986, + "eval_samples_per_second": 555.194, + "eval_steps_per_second": 8.771, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003716739950646564, + "loss": 0.7523, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003511764698847832, + "loss": 0.7404, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9013303769401331, + "eval_loss": 0.8269362449645996, + "eval_runtime": 6.6252, + "eval_samples_per_second": 544.589, + "eval_steps_per_second": 8.604, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00032999832679219933, + "loss": 0.7347, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9041019955654102, + "eval_loss": 0.8276650309562683, + "eval_runtime": 6.6993, + "eval_samples_per_second": 538.561, + "eval_steps_per_second": 8.508, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00030829636134878206, + "loss": 0.7314, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00028623124731382477, + "loss": 0.7246, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8182592391967773, + "eval_runtime": 6.6359, + "eval_samples_per_second": 543.705, + "eval_steps_per_second": 8.59, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002639663470717383, + "loss": 0.7192, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00024166650215411175, + "loss": 0.7217, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8261517286300659, + "eval_runtime": 6.7318, + "eval_samples_per_second": 535.963, + "eval_steps_per_second": 8.467, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00021949681281068596, + "loss": 0.7281, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8152443766593933, + "eval_runtime": 6.637, + "eval_samples_per_second": 543.62, + "eval_steps_per_second": 8.588, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00019762141566486785, + "loss": 0.71, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00017620226850359705, + "loss": 0.7117, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8220065236091614, + "eval_runtime": 6.8002, + "eval_samples_per_second": 530.572, + "eval_steps_per_second": 8.382, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00015539795119855538, + "loss": 0.7061, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.8264269828796387, + "eval_runtime": 6.6227, + "eval_samples_per_second": 544.792, + "eval_steps_per_second": 8.607, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001353624916362792, + "loss": 0.7066, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00011624422534957707, + "loss": 0.7043, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8148996829986572, + "eval_runtime": 6.568, + "eval_samples_per_second": 549.332, + "eval_steps_per_second": 8.678, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.81846972931429e-05, + "loss": 0.7012, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 8.131761389423363e-05, + "loss": 0.6985, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8143835067749023, + "eval_runtime": 6.601, + "eval_samples_per_second": 546.582, + "eval_steps_per_second": 8.635, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.576785313706155e-05, + "loss": 0.6977, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8138031959533691, + "eval_runtime": 6.8329, + "eval_samples_per_second": 528.033, + "eval_steps_per_second": 8.342, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 5.1650540009890064e-05, + "loss": 0.6949, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.9070194159899655e-05, + "loss": 0.6923, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8191729784011841, + "eval_runtime": 6.7911, + "eval_samples_per_second": 531.286, + "eval_steps_per_second": 8.393, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.811995606628784e-05, + "loss": 0.6926, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.8880897460748494e-05, + "loss": 0.6933, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8060517907142639, + "eval_runtime": 6.4302, + "eval_samples_per_second": 561.105, + "eval_steps_per_second": 8.864, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.1421421100734875e-05, + "loss": 0.6931, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8159933090209961, + "eval_runtime": 6.6245, + "eval_samples_per_second": 544.644, + "eval_steps_per_second": 8.604, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 5.7967543393753565e-06, + "loss": 0.6873, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.048540241473269e-06, + "loss": 0.694, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8138334155082703, + "eval_runtime": 6.7328, + "eval_samples_per_second": 535.883, + "eval_steps_per_second": 8.466, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 2.0452927282481352e-07, + "loss": 0.6909, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.804080605506897, + "eval_runtime": 6.5394, + "eval_samples_per_second": 551.73, + "eval_steps_per_second": 8.716, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0005184928435501148, + "metric": "eval/loss", + "warmup_ratio": 0.24608524478152627 + } +} diff --git a/run-8rke1nw3/checkpoint-1260/training_args.bin b/run-8rke1nw3/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..eccf5dcf494aea4448beeceec7256c4f951f19ff --- /dev/null +++ b/run-8rke1nw3/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc682c6cd54a918d23a4224158efcbf35d0120821ab6ff34a48d77ed1f826aa7 +size 4792 diff --git a/run-92oboknm/checkpoint-595/model.safetensors b/run-92oboknm/checkpoint-595/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e2aad90a6df3cec975e4de472b2c652573de132 --- /dev/null +++ b/run-92oboknm/checkpoint-595/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf358abbd1cfc22ea2c81525dc5c7854899609fbd22844c5bbd36062e7c9883 +size 198025308 diff --git a/run-92oboknm/checkpoint-595/optimizer.pt b/run-92oboknm/checkpoint-595/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..895ea86c7c84358c7e0dbbf1bf5921a7de8bc284 --- /dev/null +++ b/run-92oboknm/checkpoint-595/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f249d235853ef24061000da4e17cfb1c7746aad95d7656c4742d4c4f5c2fa072 +size 395900602 diff --git a/run-92oboknm/checkpoint-595/rng_state.pth b/run-92oboknm/checkpoint-595/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b2798d3ef22ba33b35deea6a8c61abbb56099a6 --- /dev/null +++ b/run-92oboknm/checkpoint-595/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4b46d5b7fd917d05ccb48b8b2f6f0c7b9f5cfd5e53675d2f6391274fc4f7a5 +size 14244 diff --git a/run-92oboknm/checkpoint-595/scheduler.pt b/run-92oboknm/checkpoint-595/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..26cd8ff15e9b447b99aee906df3595b6e238aeb9 --- /dev/null +++ b/run-92oboknm/checkpoint-595/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957ccc10ca440b67d0792e28087b47a3a516e1a9326f742632bbaa8eeb82e8e4 +size 1064 diff --git a/run-92oboknm/checkpoint-595/trainer_state.json b/run-92oboknm/checkpoint-595/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f9873983deb7fbe849d783305915cc374498e1ef --- /dev/null +++ b/run-92oboknm/checkpoint-595/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.9137111808388728, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-92oboknm/checkpoint-595", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 595, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00013488856153311848, + "loss": 1.2398, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.045784592628479, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.3406, + "eval_samples_per_second": 432.583, + "eval_steps_per_second": 3.477, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026977712306623696, + "loss": 0.8818, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00040466568459935547, + "loss": 0.8119, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8772172949002217, + "eval_f1": 0.8499743438000013, + "eval_loss": 0.8706405758857727, + "eval_precision": 0.8687357861135251, + "eval_recall": 0.8772172949002217, + "eval_runtime": 8.1533, + "eval_samples_per_second": 442.523, + "eval_steps_per_second": 3.557, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005395542461324739, + "loss": 0.8018, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8830376940133038, + "eval_f1": 0.8788475758193338, + "eval_loss": 0.8566984534263611, + "eval_precision": 0.8830385473356896, + "eval_recall": 0.8830376940133038, + "eval_runtime": 7.9251, + "eval_samples_per_second": 455.261, + "eval_steps_per_second": 3.659, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0006744428076655924, + "loss": 0.7928, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0008093313691987109, + "loss": 0.8058, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7777161862527716, + "eval_f1": 0.7951625172866691, + "eval_loss": 0.9944904446601868, + "eval_precision": 0.8535543807776295, + "eval_recall": 0.7777161862527716, + "eval_runtime": 7.7448, + "eval_samples_per_second": 465.86, + "eval_steps_per_second": 3.744, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0009442199307318293, + "loss": 0.8185, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.000985035971915022, + "loss": 0.7951, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8417405764966741, + "eval_f1": 0.7826333225779587, + "eval_loss": 0.9602479934692383, + "eval_precision": 0.8107999784900226, + "eval_recall": 0.8417405764966741, + "eval_runtime": 7.4885, + "eval_samples_per_second": 481.808, + "eval_steps_per_second": 3.873, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.000981617064398175, + "loss": 0.8131, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8592017738359202, + "eval_f1": 0.8186613810283488, + "eval_loss": 0.9116799831390381, + "eval_precision": 0.8538607584300889, + "eval_recall": 0.8592017738359202, + "eval_runtime": 7.8779, + "eval_samples_per_second": 457.992, + "eval_steps_per_second": 3.681, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0009753513421949541, + "loss": 0.8059, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0009662753007878153, + "loss": 0.8039, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.858370288248337, + "eval_f1": 0.8183767977736109, + "eval_loss": 0.9269529581069946, + "eval_precision": 0.8097802379478725, + "eval_recall": 0.858370288248337, + "eval_runtime": 7.888, + "eval_samples_per_second": 457.402, + "eval_steps_per_second": 3.676, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0009544418047148268, + "loss": 0.8193, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0009399197796535576, + "loss": 0.8016, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8675166297117517, + "eval_f1": 0.8343369305368533, + "eval_loss": 0.9091225862503052, + "eval_precision": 0.8482452175806302, + "eval_recall": 0.8675166297117517, + "eval_runtime": 8.0557, + "eval_samples_per_second": 447.88, + "eval_steps_per_second": 3.6, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0009227938109548503, + "loss": 0.7884, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8799889135254989, + "eval_f1": 0.8564471863494832, + "eval_loss": 0.8644145727157593, + "eval_precision": 0.8671490099867957, + "eval_recall": 0.8799889135254989, + "eval_runtime": 7.9278, + "eval_samples_per_second": 455.107, + "eval_steps_per_second": 3.658, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0009031636509648676, + "loss": 0.7973, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008811436380050787, + "loss": 0.7862, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8691796008869179, + "eval_f1": 0.8584271735765395, + "eval_loss": 0.8791645765304565, + "eval_precision": 0.8681836132174782, + "eval_recall": 0.8691796008869179, + "eval_runtime": 7.6869, + "eval_samples_per_second": 469.369, + "eval_steps_per_second": 3.773, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0008568620303944095, + "loss": 0.7775, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8334257206208425, + "eval_f1": 0.8435769323900544, + "eval_loss": 0.9313474893569946, + "eval_precision": 0.864479426487364, + "eval_recall": 0.8334257206208425, + "eval_runtime": 7.6296, + "eval_samples_per_second": 472.895, + "eval_steps_per_second": 3.801, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008304602593926256, + "loss": 0.7898, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008020921054162751, + "loss": 0.7779, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8932926829268293, + "eval_f1": 0.8814895458005204, + "eval_loss": 0.8481806516647339, + "eval_precision": 0.879799231352764, + "eval_recall": 0.8932926829268293, + "eval_runtime": 7.6146, + "eval_samples_per_second": 473.828, + "eval_steps_per_second": 3.808, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0007719228023254234, + "loss": 0.7713, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0007401280749983737, + "loss": 0.7641, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.878880266075388, + "eval_f1": 0.8636692999248561, + "eval_loss": 0.8649066686630249, + "eval_precision": 0.870614367421346, + "eval_recall": 0.878880266075388, + "eval_runtime": 8.1109, + "eval_samples_per_second": 444.836, + "eval_steps_per_second": 3.575, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0007068931158001399, + "loss": 0.7575, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8744456762749445, + "eval_f1": 0.851364495363969, + "eval_loss": 0.8923865556716919, + "eval_precision": 0.8699166882642951, + "eval_recall": 0.8744456762749445, + "eval_runtime": 8.189, + "eval_samples_per_second": 440.594, + "eval_steps_per_second": 3.541, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0006724115059063611, + "loss": 0.7596, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0006368840877655363, + "loss": 0.7508, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8938470066518847, + "eval_f1": 0.8894066916239922, + "eval_loss": 0.8343715667724609, + "eval_precision": 0.8877000147563511, + "eval_recall": 0.8938470066518847, + "eval_runtime": 7.9213, + "eval_samples_per_second": 455.48, + "eval_steps_per_second": 3.661, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00060051779526707, + "loss": 0.746, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0005635244484289588, + "loss": 0.7439, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.883869179600887, + "eval_f1": 0.8772652536165246, + "eval_loss": 0.852491557598114, + "eval_precision": 0.8860327141309204, + "eval_recall": 0.883869179600887, + "eval_runtime": 8.1513, + "eval_samples_per_second": 442.627, + "eval_steps_per_second": 3.558, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005261195196256082, + "loss": 0.7406, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8930155210643016, + "eval_f1": 0.8836317449440728, + "eval_loss": 0.8441163897514343, + "eval_precision": 0.8800233838384035, + "eval_recall": 0.8930155210643016, + "eval_runtime": 8.0592, + "eval_samples_per_second": 447.684, + "eval_steps_per_second": 3.598, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.000488520878542046, + "loss": 0.7358, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0004509475231646982, + "loss": 0.7323, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.8972145839053935, + "eval_loss": 0.832291305065155, + "eval_precision": 0.8948098372945593, + "eval_recall": 0.9057649667405765, + "eval_runtime": 7.7895, + "eval_samples_per_second": 463.188, + "eval_steps_per_second": 3.723, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00041361830420023324, + "loss": 0.7235, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00037675065035225525, + "loss": 0.7228, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9011575281967699, + "eval_loss": 0.8206110000610352, + "eval_precision": 0.8987110156664404, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.9425, + "eval_samples_per_second": 454.267, + "eval_steps_per_second": 3.651, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0003405593018806322, + "loss": 0.7136, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8966186252771619, + "eval_f1": 0.8906007487833402, + "eval_loss": 0.8425031900405884, + "eval_precision": 0.8863971354806269, + "eval_recall": 0.8966186252771619, + "eval_runtime": 7.932, + "eval_samples_per_second": 454.869, + "eval_steps_per_second": 3.656, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00030525505981999996, + "loss": 0.7141, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.0002710435581427782, + "loss": 0.713, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.9034967280293431, + "eval_loss": 0.8197495937347412, + "eval_precision": 0.9011023860348114, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.7649, + "eval_samples_per_second": 464.655, + "eval_steps_per_second": 3.735, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00023812406601838335, + "loss": 0.7119, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.9013831857062395, + "eval_loss": 0.8243157863616943, + "eval_precision": 0.8991561375156918, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.9566, + "eval_samples_per_second": 453.463, + "eval_steps_per_second": 3.645, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00020668832714503568, + "loss": 0.7069, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.00017691944291461218, + "loss": 0.7051, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9123160993006171, + "eval_loss": 0.8127403259277344, + "eval_precision": 0.9118114800112483, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.8581, + "eval_samples_per_second": 459.141, + "eval_steps_per_second": 3.69, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 0.0001489908059156937, + "loss": 0.7033, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 0.00012306508998675232, + "loss": 0.7002, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9062075329822011, + "eval_loss": 0.8152052760124207, + "eval_precision": 0.9042621737871144, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.2181, + "eval_samples_per_second": 439.029, + "eval_steps_per_second": 3.529, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 9.92933027020373e-05, + "loss": 0.6968, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9105297063937463, + "eval_loss": 0.81611567735672, + "eval_precision": 0.908721155043131, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.0374, + "eval_samples_per_second": 448.901, + "eval_steps_per_second": 3.608, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 7.781390580907614e-05, + "loss": 0.6954, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 5.875200874091341e-05, + "loss": 0.6964, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9083039777969174, + "eval_loss": 0.8146695494651794, + "eval_precision": 0.906810828915004, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.6095, + "eval_samples_per_second": 474.146, + "eval_steps_per_second": 3.811, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.221863990057346e-05, + "loss": 0.6969, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.831009996224342e-05, + "loss": 0.6947, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.9008465187648906, + "eval_loss": 0.8239182233810425, + "eval_precision": 0.8986014606815587, + "eval_recall": 0.9046563192904656, + "eval_runtime": 7.6077, + "eval_samples_per_second": 474.259, + "eval_steps_per_second": 3.812, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.7107400955955274e-05, + "loss": 0.6948, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9137111808388728, + "eval_loss": 0.8096845746040344, + "eval_precision": 0.9118322532518156, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.7392, + "eval_samples_per_second": 466.197, + "eval_steps_per_second": 3.747, + "step": 595 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.2410619127424507, + "learning_rate": 0.0009857241035112505, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-92oboknm/checkpoint-595/training_args.bin b/run-92oboknm/checkpoint-595/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a55d745c8e73d9ac7657163816a1b88cc4cfc7fc --- /dev/null +++ b/run-92oboknm/checkpoint-595/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79eef4532b7949ea238f4b16f0a9465ac9cb35d9a2375b7995abc62f6a5d2eaa +size 4792 diff --git a/run-92oboknm/checkpoint-630/model.safetensors b/run-92oboknm/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2089098ce9511fbbd46fff4810c5af5228c23f81 --- /dev/null +++ b/run-92oboknm/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19afefeb8ecbbefb7f39c1d4810fbc0ea99e82463194946b2c18f426ba1ece0 +size 198025308 diff --git a/run-92oboknm/checkpoint-630/optimizer.pt b/run-92oboknm/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fafb756fa603d7c46ea3e4833c96dbdac6e5f384 --- /dev/null +++ b/run-92oboknm/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97fd23bfcb9ae90b3b31cb79b8e2e53a8e7ce44495550046f483ed1b183d411a +size 395900602 diff --git a/run-92oboknm/checkpoint-630/rng_state.pth b/run-92oboknm/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-92oboknm/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-92oboknm/checkpoint-630/scheduler.pt b/run-92oboknm/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d75eb563f85a315d98f1d3e006014bac3d03696d --- /dev/null +++ b/run-92oboknm/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd95cefda2608e082ab5690e82b08c13ce8e40a96d963106d83bd65c6710298 +size 1064 diff --git a/run-92oboknm/checkpoint-630/trainer_state.json b/run-92oboknm/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2da79255323eef1c24b03dea06996bd008f0bcc2 --- /dev/null +++ b/run-92oboknm/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9137111808388728, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-92oboknm/checkpoint-595", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00013488856153311848, + "loss": 1.2398, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.045784592628479, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.3406, + "eval_samples_per_second": 432.583, + "eval_steps_per_second": 3.477, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00026977712306623696, + "loss": 0.8818, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00040466568459935547, + "loss": 0.8119, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8772172949002217, + "eval_f1": 0.8499743438000013, + "eval_loss": 0.8706405758857727, + "eval_precision": 0.8687357861135251, + "eval_recall": 0.8772172949002217, + "eval_runtime": 8.1533, + "eval_samples_per_second": 442.523, + "eval_steps_per_second": 3.557, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005395542461324739, + "loss": 0.8018, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8830376940133038, + "eval_f1": 0.8788475758193338, + "eval_loss": 0.8566984534263611, + "eval_precision": 0.8830385473356896, + "eval_recall": 0.8830376940133038, + "eval_runtime": 7.9251, + "eval_samples_per_second": 455.261, + "eval_steps_per_second": 3.659, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0006744428076655924, + "loss": 0.7928, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0008093313691987109, + "loss": 0.8058, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7777161862527716, + "eval_f1": 0.7951625172866691, + "eval_loss": 0.9944904446601868, + "eval_precision": 0.8535543807776295, + "eval_recall": 0.7777161862527716, + "eval_runtime": 7.7448, + "eval_samples_per_second": 465.86, + "eval_steps_per_second": 3.744, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0009442199307318293, + "loss": 0.8185, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.000985035971915022, + "loss": 0.7951, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8417405764966741, + "eval_f1": 0.7826333225779587, + "eval_loss": 0.9602479934692383, + "eval_precision": 0.8107999784900226, + "eval_recall": 0.8417405764966741, + "eval_runtime": 7.4885, + "eval_samples_per_second": 481.808, + "eval_steps_per_second": 3.873, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.000981617064398175, + "loss": 0.8131, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8592017738359202, + "eval_f1": 0.8186613810283488, + "eval_loss": 0.9116799831390381, + "eval_precision": 0.8538607584300889, + "eval_recall": 0.8592017738359202, + "eval_runtime": 7.8779, + "eval_samples_per_second": 457.992, + "eval_steps_per_second": 3.681, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0009753513421949541, + "loss": 0.8059, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0009662753007878153, + "loss": 0.8039, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.858370288248337, + "eval_f1": 0.8183767977736109, + "eval_loss": 0.9269529581069946, + "eval_precision": 0.8097802379478725, + "eval_recall": 0.858370288248337, + "eval_runtime": 7.888, + "eval_samples_per_second": 457.402, + "eval_steps_per_second": 3.676, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0009544418047148268, + "loss": 0.8193, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0009399197796535576, + "loss": 0.8016, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8675166297117517, + "eval_f1": 0.8343369305368533, + "eval_loss": 0.9091225862503052, + "eval_precision": 0.8482452175806302, + "eval_recall": 0.8675166297117517, + "eval_runtime": 8.0557, + "eval_samples_per_second": 447.88, + "eval_steps_per_second": 3.6, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0009227938109548503, + "loss": 0.7884, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8799889135254989, + "eval_f1": 0.8564471863494832, + "eval_loss": 0.8644145727157593, + "eval_precision": 0.8671490099867957, + "eval_recall": 0.8799889135254989, + "eval_runtime": 7.9278, + "eval_samples_per_second": 455.107, + "eval_steps_per_second": 3.658, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0009031636509648676, + "loss": 0.7973, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008811436380050787, + "loss": 0.7862, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8691796008869179, + "eval_f1": 0.8584271735765395, + "eval_loss": 0.8791645765304565, + "eval_precision": 0.8681836132174782, + "eval_recall": 0.8691796008869179, + "eval_runtime": 7.6869, + "eval_samples_per_second": 469.369, + "eval_steps_per_second": 3.773, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0008568620303944095, + "loss": 0.7775, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8334257206208425, + "eval_f1": 0.8435769323900544, + "eval_loss": 0.9313474893569946, + "eval_precision": 0.864479426487364, + "eval_recall": 0.8334257206208425, + "eval_runtime": 7.6296, + "eval_samples_per_second": 472.895, + "eval_steps_per_second": 3.801, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008304602593926256, + "loss": 0.7898, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008020921054162751, + "loss": 0.7779, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8932926829268293, + "eval_f1": 0.8814895458005204, + "eval_loss": 0.8481806516647339, + "eval_precision": 0.879799231352764, + "eval_recall": 0.8932926829268293, + "eval_runtime": 7.6146, + "eval_samples_per_second": 473.828, + "eval_steps_per_second": 3.808, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0007719228023254234, + "loss": 0.7713, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0007401280749983737, + "loss": 0.7641, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.878880266075388, + "eval_f1": 0.8636692999248561, + "eval_loss": 0.8649066686630249, + "eval_precision": 0.870614367421346, + "eval_recall": 0.878880266075388, + "eval_runtime": 8.1109, + "eval_samples_per_second": 444.836, + "eval_steps_per_second": 3.575, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0007068931158001399, + "loss": 0.7575, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8744456762749445, + "eval_f1": 0.851364495363969, + "eval_loss": 0.8923865556716919, + "eval_precision": 0.8699166882642951, + "eval_recall": 0.8744456762749445, + "eval_runtime": 8.189, + "eval_samples_per_second": 440.594, + "eval_steps_per_second": 3.541, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0006724115059063611, + "loss": 0.7596, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0006368840877655363, + "loss": 0.7508, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8938470066518847, + "eval_f1": 0.8894066916239922, + "eval_loss": 0.8343715667724609, + "eval_precision": 0.8877000147563511, + "eval_recall": 0.8938470066518847, + "eval_runtime": 7.9213, + "eval_samples_per_second": 455.48, + "eval_steps_per_second": 3.661, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00060051779526707, + "loss": 0.746, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0005635244484289588, + "loss": 0.7439, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.883869179600887, + "eval_f1": 0.8772652536165246, + "eval_loss": 0.852491557598114, + "eval_precision": 0.8860327141309204, + "eval_recall": 0.883869179600887, + "eval_runtime": 8.1513, + "eval_samples_per_second": 442.627, + "eval_steps_per_second": 3.558, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005261195196256082, + "loss": 0.7406, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8930155210643016, + "eval_f1": 0.8836317449440728, + "eval_loss": 0.8441163897514343, + "eval_precision": 0.8800233838384035, + "eval_recall": 0.8930155210643016, + "eval_runtime": 8.0592, + "eval_samples_per_second": 447.684, + "eval_steps_per_second": 3.598, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.000488520878542046, + "loss": 0.7358, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0004509475231646982, + "loss": 0.7323, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.8972145839053935, + "eval_loss": 0.832291305065155, + "eval_precision": 0.8948098372945593, + "eval_recall": 0.9057649667405765, + "eval_runtime": 7.7895, + "eval_samples_per_second": 463.188, + "eval_steps_per_second": 3.723, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00041361830420023324, + "loss": 0.7235, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00037675065035225525, + "loss": 0.7228, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9011575281967699, + "eval_loss": 0.8206110000610352, + "eval_precision": 0.8987110156664404, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.9425, + "eval_samples_per_second": 454.267, + "eval_steps_per_second": 3.651, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0003405593018806322, + "loss": 0.7136, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8966186252771619, + "eval_f1": 0.8906007487833402, + "eval_loss": 0.8425031900405884, + "eval_precision": 0.8863971354806269, + "eval_recall": 0.8966186252771619, + "eval_runtime": 7.932, + "eval_samples_per_second": 454.869, + "eval_steps_per_second": 3.656, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00030525505981999996, + "loss": 0.7141, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.0002710435581427782, + "loss": 0.713, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.9034967280293431, + "eval_loss": 0.8197495937347412, + "eval_precision": 0.9011023860348114, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.7649, + "eval_samples_per_second": 464.655, + "eval_steps_per_second": 3.735, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00023812406601838335, + "loss": 0.7119, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.9013831857062395, + "eval_loss": 0.8243157863616943, + "eval_precision": 0.8991561375156918, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.9566, + "eval_samples_per_second": 453.463, + "eval_steps_per_second": 3.645, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00020668832714503568, + "loss": 0.7069, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.00017691944291461218, + "loss": 0.7051, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9123160993006171, + "eval_loss": 0.8127403259277344, + "eval_precision": 0.9118114800112483, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.8581, + "eval_samples_per_second": 459.141, + "eval_steps_per_second": 3.69, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 0.0001489908059156937, + "loss": 0.7033, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 0.00012306508998675232, + "loss": 0.7002, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9062075329822011, + "eval_loss": 0.8152052760124207, + "eval_precision": 0.9042621737871144, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.2181, + "eval_samples_per_second": 439.029, + "eval_steps_per_second": 3.529, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 9.92933027020373e-05, + "loss": 0.6968, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9105297063937463, + "eval_loss": 0.81611567735672, + "eval_precision": 0.908721155043131, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.0374, + "eval_samples_per_second": 448.901, + "eval_steps_per_second": 3.608, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 7.781390580907614e-05, + "loss": 0.6954, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 5.875200874091341e-05, + "loss": 0.6964, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9083039777969174, + "eval_loss": 0.8146695494651794, + "eval_precision": 0.906810828915004, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.6095, + "eval_samples_per_second": 474.146, + "eval_steps_per_second": 3.811, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.221863990057346e-05, + "loss": 0.6969, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.831009996224342e-05, + "loss": 0.6947, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.9008465187648906, + "eval_loss": 0.8239182233810425, + "eval_precision": 0.8986014606815587, + "eval_recall": 0.9046563192904656, + "eval_runtime": 7.6077, + "eval_samples_per_second": 474.259, + "eval_steps_per_second": 3.812, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.7107400955955274e-05, + "loss": 0.6948, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9137111808388728, + "eval_loss": 0.8096845746040344, + "eval_precision": 0.9118322532518156, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.7392, + "eval_samples_per_second": 466.197, + "eval_steps_per_second": 3.747, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 8.6757944028867e-06, + "loss": 0.6982, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.06439124971668e-06, + "loss": 0.6951, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9090337197510324, + "eval_loss": 0.8118565082550049, + "eval_precision": 0.9066983077785351, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.5457, + "eval_samples_per_second": 478.156, + "eval_steps_per_second": 3.843, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 3.0587581577581907e-07, + "loss": 0.692, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9079822616407982, + "eval_f1": 0.9031102983257709, + "eval_loss": 0.8225899934768677, + "eval_precision": 0.9001313252460205, + "eval_recall": 0.9079822616407982, + "eval_runtime": 7.6767, + "eval_samples_per_second": 469.993, + "eval_steps_per_second": 3.778, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.2410619127424507, + "learning_rate": 0.0009857241035112505, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-92oboknm/checkpoint-630/training_args.bin b/run-92oboknm/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a55d745c8e73d9ac7657163816a1b88cc4cfc7fc --- /dev/null +++ b/run-92oboknm/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79eef4532b7949ea238f4b16f0a9465ac9cb35d9a2375b7995abc62f6a5d2eaa +size 4792 diff --git a/run-9f6my2zq/checkpoint-1062/model.safetensors b/run-9f6my2zq/checkpoint-1062/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3966c9e17feebdaf7ee145de72eee232d56fe48 --- /dev/null +++ b/run-9f6my2zq/checkpoint-1062/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8e0d781f730216c963ed0ffc8467673157cd4d9c2edaa9960cd7bb80d017e7f +size 198025308 diff --git a/run-9f6my2zq/checkpoint-1062/optimizer.pt b/run-9f6my2zq/checkpoint-1062/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9ba764eb0d33c7298d04e75ea1b65cc85f1ea0a --- /dev/null +++ b/run-9f6my2zq/checkpoint-1062/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3151444be0101e8545feefbb18d846263d03d03386c286b0f133b9e987d2963d +size 395900602 diff --git a/run-9f6my2zq/checkpoint-1062/rng_state.pth b/run-9f6my2zq/checkpoint-1062/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ba0322447a5f54d740a2f15d3021774620e2998 --- /dev/null +++ b/run-9f6my2zq/checkpoint-1062/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e37e38bd05b40c9b267c576e83c3f4fe9c6d38f3d89e292be83ec2774568035f +size 14244 diff --git a/run-9f6my2zq/checkpoint-1062/scheduler.pt b/run-9f6my2zq/checkpoint-1062/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7215e9279bcad8d4c5c18cbef0e929ebb1885834 --- /dev/null +++ b/run-9f6my2zq/checkpoint-1062/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b375e6dc0b78dff5406076fce1f0ad42d46cc59789086b653b0419b2e8602d4 +size 1064 diff --git a/run-9f6my2zq/checkpoint-1062/trainer_state.json b/run-9f6my2zq/checkpoint-1062/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..532bf4a61e9058ed7a847abc998e909aaff6d0f2 --- /dev/null +++ b/run-9f6my2zq/checkpoint-1062/trainer_state.json @@ -0,0 +1,492 @@ +{ + "best_metric": 0.9176829268292683, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-9f6my2zq/checkpoint-1062", + "epoch": 24.988235294117647, + "eval_steps": 500, + "global_step": 1062, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.071277102109182e-05, + "loss": 1.1972, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8913525498891353, + "eval_loss": 0.9123555421829224, + "eval_runtime": 6.7872, + "eval_samples_per_second": 531.592, + "eval_steps_per_second": 8.398, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00016142554204218363, + "loss": 0.8624, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00024213831306327545, + "loss": 0.8121, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8467855453491211, + "eval_runtime": 7.1147, + "eval_samples_per_second": 507.12, + "eval_steps_per_second": 8.012, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00032285108408436727, + "loss": 0.8046, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 0.8375678062438965, + "eval_runtime": 7.2539, + "eval_samples_per_second": 497.386, + "eval_steps_per_second": 7.858, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004035638551054591, + "loss": 0.8057, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004842766261265509, + "loss": 0.7958, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8738913525498891, + "eval_loss": 0.8844189643859863, + "eval_runtime": 7.1219, + "eval_samples_per_second": 506.606, + "eval_steps_per_second": 8.003, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005649893971476427, + "loss": 0.8044, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006457021681687345, + "loss": 0.8071, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8813747228381374, + "eval_loss": 0.8625046014785767, + "eval_runtime": 6.76, + "eval_samples_per_second": 533.725, + "eval_steps_per_second": 8.432, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0007264149391898264, + "loss": 0.8068, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.17710643015521063, + "eval_loss": 1.831622838973999, + "eval_runtime": 7.0933, + "eval_samples_per_second": 508.647, + "eval_steps_per_second": 8.036, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007758966338729712, + "loss": 0.8158, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007736540524573757, + "loss": 0.811, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8658536585365854, + "eval_loss": 0.8754892349243164, + "eval_runtime": 6.921, + "eval_samples_per_second": 521.309, + "eval_steps_per_second": 8.236, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007688907929539729, + "loss": 0.8261, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007616379919650101, + "loss": 0.8114, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7599778270509978, + "eval_loss": 1.017700433731079, + "eval_runtime": 7.0915, + "eval_samples_per_second": 508.776, + "eval_steps_per_second": 8.038, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007519430597941702, + "loss": 0.808, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8594789356984479, + "eval_loss": 0.8994251489639282, + "eval_runtime": 6.7549, + "eval_samples_per_second": 534.127, + "eval_steps_per_second": 8.438, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0007398693705336277, + "loss": 0.8152, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0007254958477985876, + "loss": 0.8069, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8894124168514412, + "eval_loss": 0.8574639558792114, + "eval_runtime": 6.9472, + "eval_samples_per_second": 519.349, + "eval_steps_per_second": 8.205, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0007089164488172834, + "loss": 0.7926, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8467294900221729, + "eval_loss": 0.9047557711601257, + "eval_runtime": 7.0907, + "eval_samples_per_second": 508.836, + "eval_steps_per_second": 8.039, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0006902395502488533, + "loss": 0.7941, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.000669587239743892, + "loss": 0.7921, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8705654101995566, + "eval_loss": 0.8906881809234619, + "eval_runtime": 7.2776, + "eval_samples_per_second": 495.771, + "eval_steps_per_second": 7.832, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006470945178786287, + "loss": 0.7968, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0006229084156795452, + "loss": 0.7877, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8830376940133038, + "eval_loss": 0.8558828830718994, + "eval_runtime": 6.7821, + "eval_samples_per_second": 531.985, + "eval_steps_per_second": 8.404, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.000597187033507026, + "loss": 0.7836, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8738913525498891, + "eval_loss": 0.8972464203834534, + "eval_runtime": 6.9952, + "eval_samples_per_second": 515.782, + "eval_steps_per_second": 8.148, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005700985075806904, + "loss": 0.7845, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0005418199109020485, + "loss": 0.7664, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8672394678492239, + "eval_loss": 0.8879624605178833, + "eval_runtime": 6.931, + "eval_samples_per_second": 520.561, + "eval_steps_per_second": 8.224, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0005125360957589668, + "loss": 0.7748, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0004824384853782824, + "loss": 0.7648, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8267466425895691, + "eval_runtime": 7.0365, + "eval_samples_per_second": 512.752, + "eval_steps_per_second": 8.101, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00045172382262532876, + "loss": 0.761, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8260979056358337, + "eval_runtime": 7.0901, + "eval_samples_per_second": 508.88, + "eval_steps_per_second": 8.039, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00042059288392990693, + "loss": 0.7549, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003892491668455435, + "loss": 0.7458, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.82014399766922, + "eval_runtime": 6.7907, + "eval_samples_per_second": 531.317, + "eval_steps_per_second": 8.394, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00035789755982123454, + "loss": 0.742, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00032674300288114687, + "loss": 0.7407, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8205455541610718, + "eval_runtime": 7.0052, + "eval_samples_per_second": 515.048, + "eval_steps_per_second": 8.137, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00029598914796718123, + "loss": 0.7435, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.826063871383667, + "eval_runtime": 6.7289, + "eval_samples_per_second": 536.198, + "eval_steps_per_second": 8.471, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00026583702770150553, + "loss": 0.7259, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00023648374127112947, + "loss": 0.7209, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.823644757270813, + "eval_runtime": 6.978, + "eval_samples_per_second": 517.053, + "eval_steps_per_second": 8.169, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00020812116602466057, + "loss": 0.7183, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8273306488990784, + "eval_runtime": 7.0589, + "eval_samples_per_second": 511.129, + "eval_steps_per_second": 8.075, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00018093470320331573, + "loss": 0.7227, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001551020660051228, + "loss": 0.7146, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8222212195396423, + "eval_runtime": 6.9661, + "eval_samples_per_second": 517.939, + "eval_steps_per_second": 8.183, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00013079211790453124, + "loss": 0.7165, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00010816376882113006, + "loss": 0.7093, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.824830174446106, + "eval_runtime": 6.9408, + "eval_samples_per_second": 519.822, + "eval_steps_per_second": 8.212, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 8.7364936353032e-05, + "loss": 0.7026, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8111318945884705, + "eval_runtime": 6.9516, + "eval_samples_per_second": 519.018, + "eval_steps_per_second": 8.2, + "step": 1062 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0007760843367412675, + "metric": "eval/loss", + "warmup_ratio": 0.19813274422561625 + } +} diff --git a/run-9f6my2zq/checkpoint-1062/training_args.bin b/run-9f6my2zq/checkpoint-1062/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea7f26cf5f7a7492938260b1a65447a9e5575a1a --- /dev/null +++ b/run-9f6my2zq/checkpoint-1062/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:555c4f0f6d6aa077c8365edeb377e64c98e8e4f092668d34c4ce233b18fadc9a +size 4792 diff --git a/run-9f6my2zq/checkpoint-1260/model.safetensors b/run-9f6my2zq/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24455220256352bfa209c9869e3b462b3db83bd6 --- /dev/null +++ b/run-9f6my2zq/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a961be7e8d925839fffdd4278abf7013c43ea64c4a2661615005491a93e58a1 +size 198025308 diff --git a/run-9f6my2zq/checkpoint-1260/optimizer.pt b/run-9f6my2zq/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..731f97fb159165f1fb3b5a1db467814abe8fc52f --- /dev/null +++ b/run-9f6my2zq/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844f98250d31de7216555208f63dc8d89e51da7ae5b2ad92da333d5fc469a21b +size 395900602 diff --git a/run-9f6my2zq/checkpoint-1260/rng_state.pth b/run-9f6my2zq/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-9f6my2zq/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-9f6my2zq/checkpoint-1260/scheduler.pt b/run-9f6my2zq/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc259fce363ddcd57141ed382654b62db1a75d69 --- /dev/null +++ b/run-9f6my2zq/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c931f729f3f06a7641e14ff0df1992633d534a3f2228af5223559b3acbe3acf1 +size 1064 diff --git a/run-9f6my2zq/checkpoint-1260/trainer_state.json b/run-9f6my2zq/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ee435fb9e2bbb49fd43b0b1600cb8cc6a9b1506d --- /dev/null +++ b/run-9f6my2zq/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9176829268292683, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-9f6my2zq/checkpoint-1062", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.071277102109182e-05, + "loss": 1.1972, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8913525498891353, + "eval_loss": 0.9123555421829224, + "eval_runtime": 6.7872, + "eval_samples_per_second": 531.592, + "eval_steps_per_second": 8.398, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00016142554204218363, + "loss": 0.8624, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00024213831306327545, + "loss": 0.8121, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8467855453491211, + "eval_runtime": 7.1147, + "eval_samples_per_second": 507.12, + "eval_steps_per_second": 8.012, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00032285108408436727, + "loss": 0.8046, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 0.8375678062438965, + "eval_runtime": 7.2539, + "eval_samples_per_second": 497.386, + "eval_steps_per_second": 7.858, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004035638551054591, + "loss": 0.8057, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004842766261265509, + "loss": 0.7958, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8738913525498891, + "eval_loss": 0.8844189643859863, + "eval_runtime": 7.1219, + "eval_samples_per_second": 506.606, + "eval_steps_per_second": 8.003, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005649893971476427, + "loss": 0.8044, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006457021681687345, + "loss": 0.8071, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8813747228381374, + "eval_loss": 0.8625046014785767, + "eval_runtime": 6.76, + "eval_samples_per_second": 533.725, + "eval_steps_per_second": 8.432, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0007264149391898264, + "loss": 0.8068, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.17710643015521063, + "eval_loss": 1.831622838973999, + "eval_runtime": 7.0933, + "eval_samples_per_second": 508.647, + "eval_steps_per_second": 8.036, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007758966338729712, + "loss": 0.8158, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007736540524573757, + "loss": 0.811, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8658536585365854, + "eval_loss": 0.8754892349243164, + "eval_runtime": 6.921, + "eval_samples_per_second": 521.309, + "eval_steps_per_second": 8.236, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007688907929539729, + "loss": 0.8261, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007616379919650101, + "loss": 0.8114, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7599778270509978, + "eval_loss": 1.017700433731079, + "eval_runtime": 7.0915, + "eval_samples_per_second": 508.776, + "eval_steps_per_second": 8.038, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007519430597941702, + "loss": 0.808, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8594789356984479, + "eval_loss": 0.8994251489639282, + "eval_runtime": 6.7549, + "eval_samples_per_second": 534.127, + "eval_steps_per_second": 8.438, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0007398693705336277, + "loss": 0.8152, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0007254958477985876, + "loss": 0.8069, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8894124168514412, + "eval_loss": 0.8574639558792114, + "eval_runtime": 6.9472, + "eval_samples_per_second": 519.349, + "eval_steps_per_second": 8.205, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0007089164488172834, + "loss": 0.7926, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8467294900221729, + "eval_loss": 0.9047557711601257, + "eval_runtime": 7.0907, + "eval_samples_per_second": 508.836, + "eval_steps_per_second": 8.039, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0006902395502488533, + "loss": 0.7941, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.000669587239743892, + "loss": 0.7921, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8705654101995566, + "eval_loss": 0.8906881809234619, + "eval_runtime": 7.2776, + "eval_samples_per_second": 495.771, + "eval_steps_per_second": 7.832, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006470945178786287, + "loss": 0.7968, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0006229084156795452, + "loss": 0.7877, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8830376940133038, + "eval_loss": 0.8558828830718994, + "eval_runtime": 6.7821, + "eval_samples_per_second": 531.985, + "eval_steps_per_second": 8.404, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.000597187033507026, + "loss": 0.7836, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8738913525498891, + "eval_loss": 0.8972464203834534, + "eval_runtime": 6.9952, + "eval_samples_per_second": 515.782, + "eval_steps_per_second": 8.148, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005700985075806904, + "loss": 0.7845, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0005418199109020485, + "loss": 0.7664, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8672394678492239, + "eval_loss": 0.8879624605178833, + "eval_runtime": 6.931, + "eval_samples_per_second": 520.561, + "eval_steps_per_second": 8.224, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0005125360957589668, + "loss": 0.7748, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0004824384853782824, + "loss": 0.7648, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8267466425895691, + "eval_runtime": 7.0365, + "eval_samples_per_second": 512.752, + "eval_steps_per_second": 8.101, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00045172382262532876, + "loss": 0.761, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8260979056358337, + "eval_runtime": 7.0901, + "eval_samples_per_second": 508.88, + "eval_steps_per_second": 8.039, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00042059288392990693, + "loss": 0.7549, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003892491668455435, + "loss": 0.7458, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.82014399766922, + "eval_runtime": 6.7907, + "eval_samples_per_second": 531.317, + "eval_steps_per_second": 8.394, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00035789755982123454, + "loss": 0.742, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00032674300288114687, + "loss": 0.7407, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8205455541610718, + "eval_runtime": 7.0052, + "eval_samples_per_second": 515.048, + "eval_steps_per_second": 8.137, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00029598914796718123, + "loss": 0.7435, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.826063871383667, + "eval_runtime": 6.7289, + "eval_samples_per_second": 536.198, + "eval_steps_per_second": 8.471, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00026583702770150553, + "loss": 0.7259, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00023648374127112947, + "loss": 0.7209, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.823644757270813, + "eval_runtime": 6.978, + "eval_samples_per_second": 517.053, + "eval_steps_per_second": 8.169, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00020812116602466057, + "loss": 0.7183, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8273306488990784, + "eval_runtime": 7.0589, + "eval_samples_per_second": 511.129, + "eval_steps_per_second": 8.075, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00018093470320331573, + "loss": 0.7227, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001551020660051228, + "loss": 0.7146, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8222212195396423, + "eval_runtime": 6.9661, + "eval_samples_per_second": 517.939, + "eval_steps_per_second": 8.183, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00013079211790453124, + "loss": 0.7165, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00010816376882113006, + "loss": 0.7093, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.824830174446106, + "eval_runtime": 6.9408, + "eval_samples_per_second": 519.822, + "eval_steps_per_second": 8.212, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 8.7364936353032e-05, + "loss": 0.7026, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8111318945884705, + "eval_runtime": 6.9516, + "eval_samples_per_second": 519.018, + "eval_steps_per_second": 8.2, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 6.85315788651575e-05, + "loss": 0.7029, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 5.178680675295498e-05, + "loss": 0.7002, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8215121030807495, + "eval_runtime": 7.0412, + "eval_samples_per_second": 512.413, + "eval_steps_per_second": 8.095, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 3.72400776910718e-05, + "loss": 0.7011, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 2.4986481127496355e-05, + "loss": 0.706, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8156242966651917, + "eval_runtime": 6.9837, + "eval_samples_per_second": 516.634, + "eval_steps_per_second": 8.162, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.5106116700308568e-05, + "loss": 0.7001, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8201037049293518, + "eval_runtime": 7.0854, + "eval_samples_per_second": 509.213, + "eval_steps_per_second": 8.045, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 7.663570640220999e-06, + "loss": 0.6953, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.707493581577035e-06, + "loss": 0.6957, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.812532365322113, + "eval_runtime": 7.1511, + "eval_samples_per_second": 504.538, + "eval_steps_per_second": 7.971, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 2.702825415816904e-07, + "loss": 0.6992, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.81263667345047, + "eval_runtime": 6.8246, + "eval_samples_per_second": 528.672, + "eval_steps_per_second": 8.352, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0007760843367412675, + "metric": "eval/loss", + "warmup_ratio": 0.19813274422561625 + } +} diff --git a/run-9f6my2zq/checkpoint-1260/training_args.bin b/run-9f6my2zq/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea7f26cf5f7a7492938260b1a65447a9e5575a1a --- /dev/null +++ b/run-9f6my2zq/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:555c4f0f6d6aa077c8365edeb377e64c98e8e4f092668d34c4ce233b18fadc9a +size 4792 diff --git a/run-a5wwzr2f/checkpoint-510/model.safetensors b/run-a5wwzr2f/checkpoint-510/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78fae4f83153c9e74903c0e8d19982d69482fbb0 --- /dev/null +++ b/run-a5wwzr2f/checkpoint-510/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2586777fa892d136ae1f269a93190356e04cbaf1b3ea9d23c19744ef32003cd2 +size 198025308 diff --git a/run-a5wwzr2f/checkpoint-510/optimizer.pt b/run-a5wwzr2f/checkpoint-510/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1597e282da47e7ab5f26a39b6340e13f3b902409 --- /dev/null +++ b/run-a5wwzr2f/checkpoint-510/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11ebc079caba7784b5004f06e5ddc6fb0d788ea9bea40270d756a7fdd6f391f5 +size 395900602 diff --git a/run-a5wwzr2f/checkpoint-510/rng_state.pth b/run-a5wwzr2f/checkpoint-510/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d8dc24231ef2178e0e7f1fb8387b9f8514188b8 --- /dev/null +++ b/run-a5wwzr2f/checkpoint-510/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff11e63c65ca788e4e4341e1970557dcb3db9d0cb5075b86ffbecfbd1dc05a1a +size 14244 diff --git a/run-a5wwzr2f/checkpoint-510/scheduler.pt b/run-a5wwzr2f/checkpoint-510/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..583375b333d1ce301d20ecee9d3ff507e764b911 --- /dev/null +++ b/run-a5wwzr2f/checkpoint-510/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101dc6ee50a30b38cd56c9c63db89577e47e3373e59813504f495f34ca571996 +size 1064 diff --git a/run-a5wwzr2f/checkpoint-510/trainer_state.json b/run-a5wwzr2f/checkpoint-510/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d38cc97ae335901aa8cf9473cdccdc0d9db28829 --- /dev/null +++ b/run-a5wwzr2f/checkpoint-510/trainer_state.json @@ -0,0 +1,550 @@ +{ + "best_metric": 0.9165106689304948, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-a5wwzr2f/checkpoint-510", + "epoch": 24.0, + "eval_steps": 500, + "global_step": 510, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.4726121162994222e-05, + "loss": 1.4768, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8281596452328159, + "eval_f1": 0.7509082904901826, + "eval_loss": 1.0491864681243896, + "eval_precision": 0.6868393950057955, + "eval_recall": 0.8281596452328159, + "eval_runtime": 8.2319, + "eval_samples_per_second": 438.295, + "eval_steps_per_second": 3.523, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.9452242325988443e-05, + "loss": 1.1434, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.417836348898267e-05, + "loss": 0.9352, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8691796008869179, + "eval_f1": 0.8385909184369956, + "eval_loss": 0.9232441782951355, + "eval_precision": 0.8579464481333148, + "eval_recall": 0.8691796008869179, + "eval_runtime": 7.8323, + "eval_samples_per_second": 460.658, + "eval_steps_per_second": 3.703, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 5.890448465197689e-05, + "loss": 0.8683, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9018847006651884, + "eval_f1": 0.8943908053067023, + "eval_loss": 0.8412584066390991, + "eval_precision": 0.892548860616585, + "eval_recall": 0.9018847006651884, + "eval_runtime": 8.1688, + "eval_samples_per_second": 441.68, + "eval_steps_per_second": 3.55, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.363060581497112e-05, + "loss": 0.8363, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 8.835672697796534e-05, + "loss": 0.8016, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9078817588705844, + "eval_loss": 0.8185250163078308, + "eval_precision": 0.9049279948036644, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9349, + "eval_samples_per_second": 454.703, + "eval_steps_per_second": 3.655, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010308284814095956, + "loss": 0.7949, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011780896930395377, + "loss": 0.7821, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8949556541019955, + "eval_f1": 0.8938492092849512, + "eval_loss": 0.8351148366928101, + "eval_precision": 0.8975733581219193, + "eval_recall": 0.8949556541019955, + "eval_runtime": 8.0242, + "eval_samples_per_second": 449.642, + "eval_steps_per_second": 3.614, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.000132535090466948, + "loss": 0.7657, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9105593483068322, + "eval_loss": 0.8034533262252808, + "eval_precision": 0.9087604323479832, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.8777, + "eval_samples_per_second": 458.002, + "eval_steps_per_second": 3.681, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001427079158842914, + "loss": 0.7673, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00014232979736028458, + "loss": 0.7591, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9058474110886262, + "eval_loss": 0.8168977499008179, + "eval_precision": 0.902959529072294, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0103, + "eval_samples_per_second": 450.417, + "eval_steps_per_second": 3.62, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00014148595293899874, + "loss": 0.7549, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00014018192058410953, + "loss": 0.7516, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8986305379904627, + "eval_loss": 0.8208667635917664, + "eval_precision": 0.8973935531664374, + "eval_recall": 0.9013303769401331, + "eval_runtime": 8.1679, + "eval_samples_per_second": 441.727, + "eval_steps_per_second": 3.55, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013842625837057868, + "loss": 0.7459, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9033368222752016, + "eval_loss": 0.8189777135848999, + "eval_precision": 0.9059115713647182, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.0115, + "eval_samples_per_second": 450.355, + "eval_steps_per_second": 3.62, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00013623048831990707, + "loss": 0.7396, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001336090207836644, + "loss": 0.7347, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9080584060972624, + "eval_loss": 0.8075916767120361, + "eval_precision": 0.9061560076771377, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.2946, + "eval_samples_per_second": 434.983, + "eval_steps_per_second": 3.496, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00013057905987154988, + "loss": 0.7292, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8181818181818182, + "eval_f1": 0.8373816185150018, + "eval_loss": 0.9502954483032227, + "eval_precision": 0.8859271226326274, + "eval_recall": 0.8181818181818182, + "eval_runtime": 8.0448, + "eval_samples_per_second": 448.491, + "eval_steps_per_second": 3.605, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00012716049054463808, + "loss": 0.7276, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00012337574811479202, + "loss": 0.7216, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9120543221257181, + "eval_loss": 0.8060290813446045, + "eval_precision": 0.9100273459538037, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.119, + "eval_samples_per_second": 444.39, + "eval_steps_per_second": 3.572, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00011924967100669027, + "loss": 0.7242, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001148093377487578, + "loss": 0.7158, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9097557214831761, + "eval_loss": 0.8052047491073608, + "eval_precision": 0.9083949770168024, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0088, + "eval_samples_per_second": 450.503, + "eval_steps_per_second": 3.621, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011008388926279289, + "loss": 0.717, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9075529581361468, + "eval_loss": 0.8126544952392578, + "eval_precision": 0.9055469844521028, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1861, + "eval_samples_per_second": 440.749, + "eval_steps_per_second": 3.543, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010510433761856429, + "loss": 0.7109, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 9.990336250847928e-05, + "loss": 0.7116, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9102244986171015, + "eval_loss": 0.805758535861969, + "eval_precision": 0.9112224737471255, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.0419, + "eval_samples_per_second": 448.651, + "eval_steps_per_second": 3.606, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 9.451509677801462e-05, + "loss": 0.7105, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.8974902419426e-05, + "loss": 0.712, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.8979366092493479, + "eval_loss": 0.8234587907791138, + "eval_precision": 0.9015325660172828, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.9619, + "eval_samples_per_second": 453.158, + "eval_steps_per_second": 3.642, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 8.331913849884048e-05, + "loss": 0.7064, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.866130820399113, + "eval_f1": 0.8760850360746498, + "eval_loss": 0.8916295766830444, + "eval_precision": 0.8996021067455724, + "eval_recall": 0.866130820399113, + "eval_runtime": 8.0046, + "eval_samples_per_second": 450.743, + "eval_steps_per_second": 3.623, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.75849225397739e-05, + "loss": 0.7043, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 7.1809886929061e-05, + "loss": 0.7015, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9134730354540397, + "eval_loss": 0.8052300214767456, + "eval_precision": 0.9100572523790074, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8165, + "eval_samples_per_second": 461.589, + "eval_steps_per_second": 3.71, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 6.603193194385187e-05, + "loss": 0.7034, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 6.0288977020503566e-05, + "loss": 0.6985, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9093777831019124, + "eval_loss": 0.8106514811515808, + "eval_precision": 0.9069923224147598, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.99, + "eval_samples_per_second": 451.565, + "eval_steps_per_second": 3.63, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 5.461871189773414e-05, + "loss": 0.7031, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.895509977827051, + "eval_f1": 0.8971580055201684, + "eval_loss": 0.8329421281814575, + "eval_precision": 0.9028037122419748, + "eval_recall": 0.895509977827051, + "eval_runtime": 8.0005, + "eval_samples_per_second": 450.971, + "eval_steps_per_second": 3.625, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.905834926723131e-05, + "loss": 0.6985, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.364438055501506e-05, + "loss": 0.6929, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9136328085287295, + "eval_loss": 0.8111518025398254, + "eval_precision": 0.9117389264308502, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.8395, + "eval_samples_per_second": 460.232, + "eval_steps_per_second": 3.699, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.841233643630655e-05, + "loss": 0.6978, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9137637210844304, + "eval_loss": 0.8092953562736511, + "eval_precision": 0.9128445384997131, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.8334, + "eval_samples_per_second": 460.593, + "eval_steps_per_second": 3.702, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.339655365559137e-05, + "loss": 0.6963, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.8629949682185386e-05, + "loss": 0.6931, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9078084819843948, + "eval_loss": 0.8109001517295837, + "eval_precision": 0.9052864325756059, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8359, + "eval_samples_per_second": 460.447, + "eval_steps_per_second": 3.701, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.4143806680188467e-05, + "loss": 0.694, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.9967566210583333e-05, + "loss": 0.6942, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9165106689304948, + "eval_loss": 0.8015699982643127, + "eval_precision": 0.914978831134833, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.8648, + "eval_samples_per_second": 458.754, + "eval_steps_per_second": 3.687, + "step": 510 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.1246737886459417, + "learning_rate": 0.000142730097425944, + "metric": "eval/loss", + "weight_decay": 0.14649431105245306 + } +} diff --git a/run-a5wwzr2f/checkpoint-510/training_args.bin b/run-a5wwzr2f/checkpoint-510/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..268a1f9a3b59914f6c08a4bf3a07d1639ec9ec97 --- /dev/null +++ b/run-a5wwzr2f/checkpoint-510/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a43a62fce0e38fa961c5af2dff05085ff1b807e8aadd11600cf92f5caa056a9 +size 4792 diff --git a/run-a5wwzr2f/checkpoint-630/model.safetensors b/run-a5wwzr2f/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98dcc179bf8d569965b63abd35a50bbf02934a43 --- /dev/null +++ b/run-a5wwzr2f/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7142808463b13e48dc4d5ad67a13ddcf43736957b5fb0d18a392777a725d20ad +size 198025308 diff --git a/run-a5wwzr2f/checkpoint-630/optimizer.pt b/run-a5wwzr2f/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b749eff64d39c7745acb9bc36b33c7aeef377a9e --- /dev/null +++ b/run-a5wwzr2f/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd761bd99575f4012c12b3e1fbd71520ffee98b7dff469abe6472f82501ec645 +size 395900602 diff --git a/run-a5wwzr2f/checkpoint-630/rng_state.pth b/run-a5wwzr2f/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-a5wwzr2f/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-a5wwzr2f/checkpoint-630/scheduler.pt b/run-a5wwzr2f/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..42ef491114436b90e4bc0d9f2b0cde5f6b3fa42a --- /dev/null +++ b/run-a5wwzr2f/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a885957eceb6b8a276840953e72b80b4c1da403baff4650cea590bbf3611b12a +size 1064 diff --git a/run-a5wwzr2f/checkpoint-630/trainer_state.json b/run-a5wwzr2f/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d6bf29380b7bc90697d5fe16aeaa31dc52f6171d --- /dev/null +++ b/run-a5wwzr2f/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9165106689304948, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-a5wwzr2f/checkpoint-510", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.4726121162994222e-05, + "loss": 1.4768, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8281596452328159, + "eval_f1": 0.7509082904901826, + "eval_loss": 1.0491864681243896, + "eval_precision": 0.6868393950057955, + "eval_recall": 0.8281596452328159, + "eval_runtime": 8.2319, + "eval_samples_per_second": 438.295, + "eval_steps_per_second": 3.523, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.9452242325988443e-05, + "loss": 1.1434, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.417836348898267e-05, + "loss": 0.9352, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8691796008869179, + "eval_f1": 0.8385909184369956, + "eval_loss": 0.9232441782951355, + "eval_precision": 0.8579464481333148, + "eval_recall": 0.8691796008869179, + "eval_runtime": 7.8323, + "eval_samples_per_second": 460.658, + "eval_steps_per_second": 3.703, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 5.890448465197689e-05, + "loss": 0.8683, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9018847006651884, + "eval_f1": 0.8943908053067023, + "eval_loss": 0.8412584066390991, + "eval_precision": 0.892548860616585, + "eval_recall": 0.9018847006651884, + "eval_runtime": 8.1688, + "eval_samples_per_second": 441.68, + "eval_steps_per_second": 3.55, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.363060581497112e-05, + "loss": 0.8363, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 8.835672697796534e-05, + "loss": 0.8016, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9078817588705844, + "eval_loss": 0.8185250163078308, + "eval_precision": 0.9049279948036644, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9349, + "eval_samples_per_second": 454.703, + "eval_steps_per_second": 3.655, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010308284814095956, + "loss": 0.7949, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011780896930395377, + "loss": 0.7821, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8949556541019955, + "eval_f1": 0.8938492092849512, + "eval_loss": 0.8351148366928101, + "eval_precision": 0.8975733581219193, + "eval_recall": 0.8949556541019955, + "eval_runtime": 8.0242, + "eval_samples_per_second": 449.642, + "eval_steps_per_second": 3.614, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.000132535090466948, + "loss": 0.7657, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9105593483068322, + "eval_loss": 0.8034533262252808, + "eval_precision": 0.9087604323479832, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.8777, + "eval_samples_per_second": 458.002, + "eval_steps_per_second": 3.681, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001427079158842914, + "loss": 0.7673, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00014232979736028458, + "loss": 0.7591, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9058474110886262, + "eval_loss": 0.8168977499008179, + "eval_precision": 0.902959529072294, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0103, + "eval_samples_per_second": 450.417, + "eval_steps_per_second": 3.62, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00014148595293899874, + "loss": 0.7549, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00014018192058410953, + "loss": 0.7516, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8986305379904627, + "eval_loss": 0.8208667635917664, + "eval_precision": 0.8973935531664374, + "eval_recall": 0.9013303769401331, + "eval_runtime": 8.1679, + "eval_samples_per_second": 441.727, + "eval_steps_per_second": 3.55, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013842625837057868, + "loss": 0.7459, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9033368222752016, + "eval_loss": 0.8189777135848999, + "eval_precision": 0.9059115713647182, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.0115, + "eval_samples_per_second": 450.355, + "eval_steps_per_second": 3.62, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00013623048831990707, + "loss": 0.7396, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001336090207836644, + "loss": 0.7347, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9080584060972624, + "eval_loss": 0.8075916767120361, + "eval_precision": 0.9061560076771377, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.2946, + "eval_samples_per_second": 434.983, + "eval_steps_per_second": 3.496, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00013057905987154988, + "loss": 0.7292, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8181818181818182, + "eval_f1": 0.8373816185150018, + "eval_loss": 0.9502954483032227, + "eval_precision": 0.8859271226326274, + "eval_recall": 0.8181818181818182, + "eval_runtime": 8.0448, + "eval_samples_per_second": 448.491, + "eval_steps_per_second": 3.605, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00012716049054463808, + "loss": 0.7276, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00012337574811479202, + "loss": 0.7216, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9120543221257181, + "eval_loss": 0.8060290813446045, + "eval_precision": 0.9100273459538037, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.119, + "eval_samples_per_second": 444.39, + "eval_steps_per_second": 3.572, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00011924967100669027, + "loss": 0.7242, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001148093377487578, + "loss": 0.7158, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9097557214831761, + "eval_loss": 0.8052047491073608, + "eval_precision": 0.9083949770168024, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0088, + "eval_samples_per_second": 450.503, + "eval_steps_per_second": 3.621, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011008388926279289, + "loss": 0.717, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9075529581361468, + "eval_loss": 0.8126544952392578, + "eval_precision": 0.9055469844521028, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1861, + "eval_samples_per_second": 440.749, + "eval_steps_per_second": 3.543, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010510433761856429, + "loss": 0.7109, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 9.990336250847928e-05, + "loss": 0.7116, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9102244986171015, + "eval_loss": 0.805758535861969, + "eval_precision": 0.9112224737471255, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.0419, + "eval_samples_per_second": 448.651, + "eval_steps_per_second": 3.606, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 9.451509677801462e-05, + "loss": 0.7105, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.8974902419426e-05, + "loss": 0.712, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.8979366092493479, + "eval_loss": 0.8234587907791138, + "eval_precision": 0.9015325660172828, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.9619, + "eval_samples_per_second": 453.158, + "eval_steps_per_second": 3.642, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 8.331913849884048e-05, + "loss": 0.7064, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.866130820399113, + "eval_f1": 0.8760850360746498, + "eval_loss": 0.8916295766830444, + "eval_precision": 0.8996021067455724, + "eval_recall": 0.866130820399113, + "eval_runtime": 8.0046, + "eval_samples_per_second": 450.743, + "eval_steps_per_second": 3.623, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.75849225397739e-05, + "loss": 0.7043, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 7.1809886929061e-05, + "loss": 0.7015, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9134730354540397, + "eval_loss": 0.8052300214767456, + "eval_precision": 0.9100572523790074, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8165, + "eval_samples_per_second": 461.589, + "eval_steps_per_second": 3.71, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 6.603193194385187e-05, + "loss": 0.7034, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 6.0288977020503566e-05, + "loss": 0.6985, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9093777831019124, + "eval_loss": 0.8106514811515808, + "eval_precision": 0.9069923224147598, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.99, + "eval_samples_per_second": 451.565, + "eval_steps_per_second": 3.63, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 5.461871189773414e-05, + "loss": 0.7031, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.895509977827051, + "eval_f1": 0.8971580055201684, + "eval_loss": 0.8329421281814575, + "eval_precision": 0.9028037122419748, + "eval_recall": 0.895509977827051, + "eval_runtime": 8.0005, + "eval_samples_per_second": 450.971, + "eval_steps_per_second": 3.625, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.905834926723131e-05, + "loss": 0.6985, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.364438055501506e-05, + "loss": 0.6929, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9136328085287295, + "eval_loss": 0.8111518025398254, + "eval_precision": 0.9117389264308502, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.8395, + "eval_samples_per_second": 460.232, + "eval_steps_per_second": 3.699, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.841233643630655e-05, + "loss": 0.6978, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9137637210844304, + "eval_loss": 0.8092953562736511, + "eval_precision": 0.9128445384997131, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.8334, + "eval_samples_per_second": 460.593, + "eval_steps_per_second": 3.702, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.339655365559137e-05, + "loss": 0.6963, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.8629949682185386e-05, + "loss": 0.6931, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9078084819843948, + "eval_loss": 0.8109001517295837, + "eval_precision": 0.9052864325756059, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8359, + "eval_samples_per_second": 460.447, + "eval_steps_per_second": 3.701, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.4143806680188467e-05, + "loss": 0.694, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.9967566210583333e-05, + "loss": 0.6942, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9165106689304948, + "eval_loss": 0.8015699982643127, + "eval_precision": 0.914978831134833, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.8648, + "eval_samples_per_second": 458.754, + "eval_steps_per_second": 3.687, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.6128636012804507e-05, + "loss": 0.6927, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9149700291065989, + "eval_loss": 0.8070178031921387, + "eval_precision": 0.9141290631010339, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.1335, + "eval_samples_per_second": 443.595, + "eval_steps_per_second": 3.565, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.2652210133826464e-05, + "loss": 0.6913, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 9.561103585224286e-06, + "loss": 0.6908, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9134843536054005, + "eval_loss": 0.7987606525421143, + "eval_precision": 0.9117361482968372, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.0321, + "eval_samples_per_second": 449.197, + "eval_steps_per_second": 3.611, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.875602613315695e-06, + "loss": 0.6907, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.613331565028187e-06, + "loss": 0.6867, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9164151806644982, + "eval_loss": 0.8042768239974976, + "eval_precision": 0.914472777095423, + "eval_recall": 0.9196230598669624, + "eval_runtime": 7.7445, + "eval_samples_per_second": 465.88, + "eval_steps_per_second": 3.745, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.789137223221377e-06, + "loss": 0.6903, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9150692612327639, + "eval_loss": 0.8065941333770752, + "eval_precision": 0.9137512620929692, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.0957, + "eval_samples_per_second": 445.668, + "eval_steps_per_second": 3.582, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.4149913705457141e-06, + "loss": 0.6892, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.999122212896764e-07, + "loss": 0.6898, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9151806121755539, + "eval_loss": 0.800763726234436, + "eval_precision": 0.912190444740976, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.7904, + "eval_samples_per_second": 463.133, + "eval_steps_per_second": 3.723, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 4.990523684068138e-08, + "loss": 0.6917, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9162068501329884, + "eval_loss": 0.8043108582496643, + "eval_precision": 0.9143559502505612, + "eval_recall": 0.9210088691796009, + "eval_runtime": 8.0236, + "eval_samples_per_second": 449.674, + "eval_steps_per_second": 3.614, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.1246737886459417, + "learning_rate": 0.000142730097425944, + "metric": "eval/loss", + "weight_decay": 0.14649431105245306 + } +} diff --git a/run-a5wwzr2f/checkpoint-630/training_args.bin b/run-a5wwzr2f/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..268a1f9a3b59914f6c08a4bf3a07d1639ec9ec97 --- /dev/null +++ b/run-a5wwzr2f/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a43a62fce0e38fa961c5af2dff05085ff1b807e8aadd11600cf92f5caa056a9 +size 4792 diff --git a/run-ab5xbib7/checkpoint-616/model.safetensors b/run-ab5xbib7/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0e9e768a7ea6f84922572f7621fc83fd4dcdf97 --- /dev/null +++ b/run-ab5xbib7/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c627f00d55c377392cfc6420229b61ad211ca46834ec847b3796853de83d4e0 +size 198025308 diff --git a/run-ab5xbib7/checkpoint-616/optimizer.pt b/run-ab5xbib7/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..552f861cc7cd0150a5326e18ccc5fc4764d5659b --- /dev/null +++ b/run-ab5xbib7/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb84ebeb219ad7d019a1289911ffd3d9401b428395b9419f93e8f315c0c5921 +size 395900602 diff --git a/run-ab5xbib7/checkpoint-616/rng_state.pth b/run-ab5xbib7/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-ab5xbib7/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-ab5xbib7/checkpoint-616/scheduler.pt b/run-ab5xbib7/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2878abf9a3604eaac465598f9919a3e19e8fa9ed --- /dev/null +++ b/run-ab5xbib7/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:565c14445f2e8efe232ee2193374263a9a4e1be49aa171f1e2fc14ba0041d175 +size 1064 diff --git a/run-ab5xbib7/checkpoint-616/trainer_state.json b/run-ab5xbib7/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2248095617e9d8a50eaa6bf29a7e3dd9ffcba126 --- /dev/null +++ b/run-ab5xbib7/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9221413651835971, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-ab5xbib7/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.571402303866127e-05, + "loss": 1.4912, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7512330582238824, + "eval_loss": 1.0430140495300293, + "eval_precision": 0.7698841730891972, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.28, + "eval_samples_per_second": 435.75, + "eval_steps_per_second": 3.502, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.142804607732254e-05, + "loss": 1.1336, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.71420691159838e-05, + "loss": 0.9451, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.864190687361419, + "eval_f1": 0.8227326774412022, + "eval_loss": 1.0004956722259521, + "eval_precision": 0.8598343541949893, + "eval_recall": 0.864190687361419, + "eval_runtime": 7.8325, + "eval_samples_per_second": 460.645, + "eval_steps_per_second": 3.703, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.285609215464508e-05, + "loss": 0.8719, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9008792506460577, + "eval_loss": 0.8843601942062378, + "eval_precision": 0.8978937878287648, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.6228, + "eval_samples_per_second": 473.314, + "eval_steps_per_second": 3.804, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.857011519330634e-05, + "loss": 0.8316, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.42841382319676e-05, + "loss": 0.8035, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.899243119264921, + "eval_loss": 0.8161300420761108, + "eval_precision": 0.8968966544294303, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.363, + "eval_samples_per_second": 490.016, + "eval_steps_per_second": 3.939, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010999816127062886, + "loss": 0.8012, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011475308047363453, + "loss": 0.7785, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9074384662864232, + "eval_loss": 0.807421088218689, + "eval_precision": 0.90626361553853, + "eval_recall": 0.9104767184035477, + "eval_runtime": 8.1727, + "eval_samples_per_second": 441.467, + "eval_steps_per_second": 3.548, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011435479027855673, + "loss": 0.7799, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9069556075680414, + "eval_loss": 0.8092712759971619, + "eval_precision": 0.9050127330582727, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.2054, + "eval_samples_per_second": 439.709, + "eval_steps_per_second": 3.534, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001136248566063744, + "loss": 0.7721, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011256753104703402, + "loss": 0.7671, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9099115348465453, + "eval_loss": 0.7979322075843811, + "eval_precision": 0.9070238890185478, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.1761, + "eval_samples_per_second": 441.287, + "eval_steps_per_second": 3.547, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0001111889721254668, + "loss": 0.76, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001094972094304901, + "loss": 0.7524, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.9038972823060315, + "eval_loss": 0.8151103258132935, + "eval_precision": 0.9029526616543452, + "eval_recall": 0.9057649667405765, + "eval_runtime": 8.0783, + "eval_samples_per_second": 446.63, + "eval_steps_per_second": 3.59, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010750209684546337, + "loss": 0.7446, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9004988913525499, + "eval_f1": 0.9010752474604765, + "eval_loss": 0.8283939361572266, + "eval_precision": 0.9060755002001263, + "eval_recall": 0.9004988913525499, + "eval_runtime": 7.7969, + "eval_samples_per_second": 462.746, + "eval_steps_per_second": 3.719, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010521525515311232, + "loss": 0.7478, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010265000434882684, + "loss": 0.74, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9114792283749521, + "eval_loss": 0.7978630661964417, + "eval_precision": 0.9089188696777954, + "eval_recall": 0.9196230598669624, + "eval_runtime": 7.9764, + "eval_samples_per_second": 452.336, + "eval_steps_per_second": 3.636, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 9.982128605668238e-05, + "loss": 0.7351, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9077469683421542, + "eval_loss": 0.8077139258384705, + "eval_precision": 0.9055605821116257, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.8527, + "eval_samples_per_second": 459.459, + "eval_steps_per_second": 3.693, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 9.674557650008199e-05, + "loss": 0.7402, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.344079053393309e-05, + "loss": 0.7301, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8919068736141907, + "eval_f1": 0.8964669097872539, + "eval_loss": 0.8415220975875854, + "eval_precision": 0.9047448128877434, + "eval_recall": 0.8919068736141907, + "eval_runtime": 8.0164, + "eval_samples_per_second": 450.075, + "eval_steps_per_second": 3.618, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 8.992617729733482e-05, + "loss": 0.723, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 8.622220809456041e-05, + "loss": 0.7225, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.908096680237204, + "eval_loss": 0.8086981177330017, + "eval_precision": 0.9038741168218466, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.9182, + "eval_samples_per_second": 455.662, + "eval_steps_per_second": 3.662, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.235045715738559e-05, + "loss": 0.7213, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.8998774120456622, + "eval_loss": 0.8167815208435059, + "eval_precision": 0.9017448109088967, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.7249, + "eval_samples_per_second": 467.059, + "eval_steps_per_second": 3.754, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 7.833347598327815e-05, + "loss": 0.7213, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.419466198138072e-05, + "loss": 0.7198, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9103461261934056, + "eval_loss": 0.8095038533210754, + "eval_precision": 0.9096007807647974, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.4699, + "eval_samples_per_second": 483.008, + "eval_steps_per_second": 3.882, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 6.995812219137573e-05, + "loss": 0.7155, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.564853286901842e-05, + "loss": 0.7193, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8833148558758315, + "eval_f1": 0.8903010012972837, + "eval_loss": 0.8519492149353027, + "eval_precision": 0.9079347088736005, + "eval_recall": 0.8833148558758315, + "eval_runtime": 7.8841, + "eval_samples_per_second": 457.628, + "eval_steps_per_second": 3.678, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.129099575619938e-05, + "loss": 0.7092, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9179981415257992, + "eval_loss": 0.8013831377029419, + "eval_precision": 0.9159400380071565, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.9484, + "eval_samples_per_second": 453.926, + "eval_steps_per_second": 3.649, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.6910891872710425e-05, + "loss": 0.7115, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.253373368132082e-05, + "loss": 0.7072, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9140326222659831, + "eval_loss": 0.8027175664901733, + "eval_precision": 0.9134843006785892, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.0754, + "eval_samples_per_second": 446.79, + "eval_steps_per_second": 3.591, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.818501648724792e-05, + "loss": 0.7075, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.3890069937563846e-05, + "loss": 0.7061, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9091818868686966, + "eval_loss": 0.8095321655273438, + "eval_precision": 0.9060916845198314, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0556, + "eval_samples_per_second": 447.886, + "eval_steps_per_second": 3.6, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.967391048549889e-05, + "loss": 0.703, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.895509977827051, + "eval_f1": 0.8975091933571794, + "eval_loss": 0.835809588432312, + "eval_precision": 0.9016962575730014, + "eval_recall": 0.895509977827051, + "eval_runtime": 7.875, + "eval_samples_per_second": 458.156, + "eval_steps_per_second": 3.683, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.556109567898145e-05, + "loss": 0.7023, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.157558112212951e-05, + "loss": 0.7072, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9145309978960433, + "eval_loss": 0.8057080507278442, + "eval_precision": 0.913917234249311, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.1832, + "eval_samples_per_second": 440.903, + "eval_steps_per_second": 3.544, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.774058094283885e-05, + "loss": 0.7019, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9105323358587971, + "eval_loss": 0.8095676898956299, + "eval_precision": 0.9075117748076269, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.5844, + "eval_samples_per_second": 475.713, + "eval_steps_per_second": 3.824, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.4078432579192463e-05, + "loss": 0.7009, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.061046667225927e-05, + "loss": 0.6995, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.917566540296287, + "eval_loss": 0.7996167540550232, + "eval_precision": 0.9170211840414066, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.5459, + "eval_samples_per_second": 478.138, + "eval_steps_per_second": 3.843, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.7356882823107923e-05, + "loss": 0.6973, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.4336631937704669e-05, + "loss": 0.6973, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9137727287787337, + "eval_loss": 0.8062389492988586, + "eval_precision": 0.9118676657919131, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.4043, + "eval_samples_per_second": 487.287, + "eval_steps_per_second": 3.917, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.1567305844991827e-05, + "loss": 0.7, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9137523566711168, + "eval_loss": 0.804232656955719, + "eval_precision": 0.9113681093766327, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.9174, + "eval_samples_per_second": 455.707, + "eval_steps_per_second": 3.663, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 9.06503483108032e-06, + "loss": 0.6926, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 6.844393686381373e-06, + "loss": 0.6964, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.91366682427474, + "eval_loss": 0.8031045794487, + "eval_precision": 0.910891880159428, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.5996, + "eval_samples_per_second": 474.761, + "eval_steps_per_second": 3.816, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.918316812917217e-06, + "loss": 0.6954, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.2980228862791383e-06, + "loss": 0.6958, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9105602581760454, + "eval_loss": 0.8108755946159363, + "eval_precision": 0.908454491977231, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.7341, + "eval_samples_per_second": 466.508, + "eval_steps_per_second": 3.75, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.9929495110487444e-06, + "loss": 0.6967, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9148073622399701, + "eval_loss": 0.8010573387145996, + "eval_precision": 0.912189812766469, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.7549, + "eval_samples_per_second": 465.253, + "eval_steps_per_second": 3.74, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.0106982502899423e-06, + "loss": 0.6978, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.5699034929434635e-07, + "loss": 0.6973, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9254434589800443, + "eval_f1": 0.9221413651835971, + "eval_loss": 0.796552300453186, + "eval_precision": 0.9199399361700525, + "eval_recall": 0.9254434589800443, + "eval_runtime": 8.2623, + "eval_samples_per_second": 436.685, + "eval_steps_per_second": 3.51, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4853654432632253, + "learning_rate": 0.00011483324528252464, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-ab5xbib7/checkpoint-616/training_args.bin b/run-ab5xbib7/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..92f5026467b1db6ff2ee2ae995f96bd54109cd29 --- /dev/null +++ b/run-ab5xbib7/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a0d039ac733935a5db5b5d62b53d5cd1a931c398d88f5b4dbaffe6feb4313c3 +size 4792 diff --git a/run-ab5xbib7/checkpoint-630/model.safetensors b/run-ab5xbib7/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29b2b06f3de1d59f78066838d72ea9b9a13e44ca --- /dev/null +++ b/run-ab5xbib7/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c00f51be0303a5c451ab0106da553ba2a562d56775434fae21674a65a5808dd +size 198025308 diff --git a/run-ab5xbib7/checkpoint-630/optimizer.pt b/run-ab5xbib7/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..76f470f35ac27cc156bdb12ead09cc53d052b52e --- /dev/null +++ b/run-ab5xbib7/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:267d40a9aadafe870774af068367579bde8bd476f9a1186af90fc803da3a6acd +size 395900602 diff --git a/run-ab5xbib7/checkpoint-630/rng_state.pth b/run-ab5xbib7/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-ab5xbib7/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-ab5xbib7/checkpoint-630/scheduler.pt b/run-ab5xbib7/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d814f38849994e0fc3a994cb65e6714d77b264c3 --- /dev/null +++ b/run-ab5xbib7/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0105bda08f4b2fcdc8299125704b6abbbe34b69428d050c811e76a5e4fc48472 +size 1064 diff --git a/run-ab5xbib7/checkpoint-630/trainer_state.json b/run-ab5xbib7/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..778105e11e10a084cd8f5c7db95b49905c6494d3 --- /dev/null +++ b/run-ab5xbib7/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9221413651835971, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-ab5xbib7/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.571402303866127e-05, + "loss": 1.4912, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7512330582238824, + "eval_loss": 1.0430140495300293, + "eval_precision": 0.7698841730891972, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.28, + "eval_samples_per_second": 435.75, + "eval_steps_per_second": 3.502, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.142804607732254e-05, + "loss": 1.1336, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.71420691159838e-05, + "loss": 0.9451, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.864190687361419, + "eval_f1": 0.8227326774412022, + "eval_loss": 1.0004956722259521, + "eval_precision": 0.8598343541949893, + "eval_recall": 0.864190687361419, + "eval_runtime": 7.8325, + "eval_samples_per_second": 460.645, + "eval_steps_per_second": 3.703, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.285609215464508e-05, + "loss": 0.8719, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9008792506460577, + "eval_loss": 0.8843601942062378, + "eval_precision": 0.8978937878287648, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.6228, + "eval_samples_per_second": 473.314, + "eval_steps_per_second": 3.804, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.857011519330634e-05, + "loss": 0.8316, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.42841382319676e-05, + "loss": 0.8035, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.899243119264921, + "eval_loss": 0.8161300420761108, + "eval_precision": 0.8968966544294303, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.363, + "eval_samples_per_second": 490.016, + "eval_steps_per_second": 3.939, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010999816127062886, + "loss": 0.8012, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011475308047363453, + "loss": 0.7785, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9074384662864232, + "eval_loss": 0.807421088218689, + "eval_precision": 0.90626361553853, + "eval_recall": 0.9104767184035477, + "eval_runtime": 8.1727, + "eval_samples_per_second": 441.467, + "eval_steps_per_second": 3.548, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011435479027855673, + "loss": 0.7799, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9069556075680414, + "eval_loss": 0.8092712759971619, + "eval_precision": 0.9050127330582727, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.2054, + "eval_samples_per_second": 439.709, + "eval_steps_per_second": 3.534, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001136248566063744, + "loss": 0.7721, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011256753104703402, + "loss": 0.7671, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9099115348465453, + "eval_loss": 0.7979322075843811, + "eval_precision": 0.9070238890185478, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.1761, + "eval_samples_per_second": 441.287, + "eval_steps_per_second": 3.547, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0001111889721254668, + "loss": 0.76, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001094972094304901, + "loss": 0.7524, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.9038972823060315, + "eval_loss": 0.8151103258132935, + "eval_precision": 0.9029526616543452, + "eval_recall": 0.9057649667405765, + "eval_runtime": 8.0783, + "eval_samples_per_second": 446.63, + "eval_steps_per_second": 3.59, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010750209684546337, + "loss": 0.7446, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9004988913525499, + "eval_f1": 0.9010752474604765, + "eval_loss": 0.8283939361572266, + "eval_precision": 0.9060755002001263, + "eval_recall": 0.9004988913525499, + "eval_runtime": 7.7969, + "eval_samples_per_second": 462.746, + "eval_steps_per_second": 3.719, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010521525515311232, + "loss": 0.7478, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010265000434882684, + "loss": 0.74, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9114792283749521, + "eval_loss": 0.7978630661964417, + "eval_precision": 0.9089188696777954, + "eval_recall": 0.9196230598669624, + "eval_runtime": 7.9764, + "eval_samples_per_second": 452.336, + "eval_steps_per_second": 3.636, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 9.982128605668238e-05, + "loss": 0.7351, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9077469683421542, + "eval_loss": 0.8077139258384705, + "eval_precision": 0.9055605821116257, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.8527, + "eval_samples_per_second": 459.459, + "eval_steps_per_second": 3.693, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 9.674557650008199e-05, + "loss": 0.7402, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.344079053393309e-05, + "loss": 0.7301, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8919068736141907, + "eval_f1": 0.8964669097872539, + "eval_loss": 0.8415220975875854, + "eval_precision": 0.9047448128877434, + "eval_recall": 0.8919068736141907, + "eval_runtime": 8.0164, + "eval_samples_per_second": 450.075, + "eval_steps_per_second": 3.618, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 8.992617729733482e-05, + "loss": 0.723, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 8.622220809456041e-05, + "loss": 0.7225, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.908096680237204, + "eval_loss": 0.8086981177330017, + "eval_precision": 0.9038741168218466, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.9182, + "eval_samples_per_second": 455.662, + "eval_steps_per_second": 3.662, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.235045715738559e-05, + "loss": 0.7213, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.8998774120456622, + "eval_loss": 0.8167815208435059, + "eval_precision": 0.9017448109088967, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.7249, + "eval_samples_per_second": 467.059, + "eval_steps_per_second": 3.754, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 7.833347598327815e-05, + "loss": 0.7213, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.419466198138072e-05, + "loss": 0.7198, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9103461261934056, + "eval_loss": 0.8095038533210754, + "eval_precision": 0.9096007807647974, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.4699, + "eval_samples_per_second": 483.008, + "eval_steps_per_second": 3.882, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 6.995812219137573e-05, + "loss": 0.7155, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.564853286901842e-05, + "loss": 0.7193, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8833148558758315, + "eval_f1": 0.8903010012972837, + "eval_loss": 0.8519492149353027, + "eval_precision": 0.9079347088736005, + "eval_recall": 0.8833148558758315, + "eval_runtime": 7.8841, + "eval_samples_per_second": 457.628, + "eval_steps_per_second": 3.678, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.129099575619938e-05, + "loss": 0.7092, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9179981415257992, + "eval_loss": 0.8013831377029419, + "eval_precision": 0.9159400380071565, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.9484, + "eval_samples_per_second": 453.926, + "eval_steps_per_second": 3.649, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.6910891872710425e-05, + "loss": 0.7115, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.253373368132082e-05, + "loss": 0.7072, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9140326222659831, + "eval_loss": 0.8027175664901733, + "eval_precision": 0.9134843006785892, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.0754, + "eval_samples_per_second": 446.79, + "eval_steps_per_second": 3.591, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.818501648724792e-05, + "loss": 0.7075, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.3890069937563846e-05, + "loss": 0.7061, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9091818868686966, + "eval_loss": 0.8095321655273438, + "eval_precision": 0.9060916845198314, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0556, + "eval_samples_per_second": 447.886, + "eval_steps_per_second": 3.6, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.967391048549889e-05, + "loss": 0.703, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.895509977827051, + "eval_f1": 0.8975091933571794, + "eval_loss": 0.835809588432312, + "eval_precision": 0.9016962575730014, + "eval_recall": 0.895509977827051, + "eval_runtime": 7.875, + "eval_samples_per_second": 458.156, + "eval_steps_per_second": 3.683, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.556109567898145e-05, + "loss": 0.7023, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.157558112212951e-05, + "loss": 0.7072, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9145309978960433, + "eval_loss": 0.8057080507278442, + "eval_precision": 0.913917234249311, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.1832, + "eval_samples_per_second": 440.903, + "eval_steps_per_second": 3.544, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.774058094283885e-05, + "loss": 0.7019, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9105323358587971, + "eval_loss": 0.8095676898956299, + "eval_precision": 0.9075117748076269, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.5844, + "eval_samples_per_second": 475.713, + "eval_steps_per_second": 3.824, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.4078432579192463e-05, + "loss": 0.7009, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.061046667225927e-05, + "loss": 0.6995, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.917566540296287, + "eval_loss": 0.7996167540550232, + "eval_precision": 0.9170211840414066, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.5459, + "eval_samples_per_second": 478.138, + "eval_steps_per_second": 3.843, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.7356882823107923e-05, + "loss": 0.6973, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.4336631937704669e-05, + "loss": 0.6973, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9137727287787337, + "eval_loss": 0.8062389492988586, + "eval_precision": 0.9118676657919131, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.4043, + "eval_samples_per_second": 487.287, + "eval_steps_per_second": 3.917, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.1567305844991827e-05, + "loss": 0.7, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9137523566711168, + "eval_loss": 0.804232656955719, + "eval_precision": 0.9113681093766327, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.9174, + "eval_samples_per_second": 455.707, + "eval_steps_per_second": 3.663, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 9.06503483108032e-06, + "loss": 0.6926, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 6.844393686381373e-06, + "loss": 0.6964, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.91366682427474, + "eval_loss": 0.8031045794487, + "eval_precision": 0.910891880159428, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.5996, + "eval_samples_per_second": 474.761, + "eval_steps_per_second": 3.816, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.918316812917217e-06, + "loss": 0.6954, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.2980228862791383e-06, + "loss": 0.6958, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9105602581760454, + "eval_loss": 0.8108755946159363, + "eval_precision": 0.908454491977231, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.7341, + "eval_samples_per_second": 466.508, + "eval_steps_per_second": 3.75, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.9929495110487444e-06, + "loss": 0.6967, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9148073622399701, + "eval_loss": 0.8010573387145996, + "eval_precision": 0.912189812766469, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.7549, + "eval_samples_per_second": 465.253, + "eval_steps_per_second": 3.74, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.0106982502899423e-06, + "loss": 0.6978, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.5699034929434635e-07, + "loss": 0.6973, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9254434589800443, + "eval_f1": 0.9221413651835971, + "eval_loss": 0.796552300453186, + "eval_precision": 0.9199399361700525, + "eval_recall": 0.9254434589800443, + "eval_runtime": 8.2623, + "eval_samples_per_second": 436.685, + "eval_steps_per_second": 3.51, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 3.563341147270227e-08, + "loss": 0.6922, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9125393596978265, + "eval_loss": 0.8054535388946533, + "eval_precision": 0.9111123049586122, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.2864, + "eval_samples_per_second": 495.17, + "eval_steps_per_second": 3.98, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4853654432632253, + "learning_rate": 0.00011483324528252464, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-ab5xbib7/checkpoint-630/training_args.bin b/run-ab5xbib7/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..92f5026467b1db6ff2ee2ae995f96bd54109cd29 --- /dev/null +++ b/run-ab5xbib7/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a0d039ac733935a5db5b5d62b53d5cd1a931c398d88f5b4dbaffe6feb4313c3 +size 4792 diff --git a/run-acof4i3t/checkpoint-616/model.safetensors b/run-acof4i3t/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bab1ddbc318e1f64b7626c0dd4627238dce38d6 --- /dev/null +++ b/run-acof4i3t/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999394c75b67e1bd4b22e2c8b3388bd5a60ed82acfa0fa463b2da811b083ae29 +size 198025308 diff --git a/run-acof4i3t/checkpoint-616/optimizer.pt b/run-acof4i3t/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9677e3367f34e4d5932ce8a6c136080239d3ef6 --- /dev/null +++ b/run-acof4i3t/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5a1662927bcb660bd584ac75dcb19592f0962a49f6b36fd7cb8a3d6d5a3c60 +size 395900602 diff --git a/run-acof4i3t/checkpoint-616/rng_state.pth b/run-acof4i3t/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-acof4i3t/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-acof4i3t/checkpoint-616/scheduler.pt b/run-acof4i3t/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7300d591b1058c2c3e90a882cf3e27bf7bf4642e --- /dev/null +++ b/run-acof4i3t/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32fbe5c88ed74df4663a0b06c436724b6f711a4443e963bb5f468f2f024cdef +size 1064 diff --git a/run-acof4i3t/checkpoint-616/trainer_state.json b/run-acof4i3t/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..098bc9b321887a1b8a520ab0c18d80ed6f508fe2 --- /dev/null +++ b/run-acof4i3t/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9218744662805249, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-acof4i3t/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.332081336180439e-05, + "loss": 1.3688, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9812324047088623, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.184, + "eval_samples_per_second": 440.862, + "eval_steps_per_second": 3.544, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 8.664162672360879e-05, + "loss": 0.9883, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012996244008541317, + "loss": 0.8638, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.9007760532150776, + "eval_f1": 0.8874209694202322, + "eval_loss": 0.9336458444595337, + "eval_precision": 0.8877094497284251, + "eval_recall": 0.9007760532150776, + "eval_runtime": 7.9916, + "eval_samples_per_second": 451.471, + "eval_steps_per_second": 3.629, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017328325344721757, + "loss": 0.8078, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9111952437633513, + "eval_loss": 0.7998877167701721, + "eval_precision": 0.9085742834607444, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.8937, + "eval_samples_per_second": 457.072, + "eval_steps_per_second": 3.674, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00021660406680902195, + "loss": 0.7931, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00025992488017082633, + "loss": 0.7759, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.8914998911311324, + "eval_loss": 0.8200076818466187, + "eval_precision": 0.8969210963806538, + "eval_recall": 0.9032705099778271, + "eval_runtime": 7.8962, + "eval_samples_per_second": 456.926, + "eval_steps_per_second": 3.673, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00030324569353263074, + "loss": 0.7868, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031635417420858986, + "loss": 0.7649, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9069203034282971, + "eval_loss": 0.8162654638290405, + "eval_precision": 0.9044167844360006, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.3891, + "eval_samples_per_second": 430.083, + "eval_steps_per_second": 3.457, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003152561577959659, + "loss": 0.7729, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8614190687361419, + "eval_f1": 0.8677684719509549, + "eval_loss": 0.8950210809707642, + "eval_precision": 0.8906370624912869, + "eval_recall": 0.8614190687361419, + "eval_runtime": 8.1853, + "eval_samples_per_second": 440.793, + "eval_steps_per_second": 3.543, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003132438583166212, + "loss": 0.7618, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0003103289966604968, + "loss": 0.7534, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.9010951690369824, + "eval_loss": 0.82647305727005, + "eval_precision": 0.9079991467446424, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.3308, + "eval_samples_per_second": 433.094, + "eval_steps_per_second": 3.481, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00030652855080379074, + "loss": 0.7463, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00030186465691861336, + "loss": 0.7372, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8478381374722838, + "eval_f1": 0.8635590930258699, + "eval_loss": 0.9075872302055359, + "eval_precision": 0.8971633502490409, + "eval_recall": 0.8478381374722838, + "eval_runtime": 7.8836, + "eval_samples_per_second": 457.661, + "eval_steps_per_second": 3.679, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002963644804380847, + "loss": 0.7325, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.904379157427938, + "eval_f1": 0.9011642461264477, + "eval_loss": 0.8325739502906799, + "eval_precision": 0.9022742364410467, + "eval_recall": 0.904379157427938, + "eval_runtime": 8.031, + "eval_samples_per_second": 449.256, + "eval_steps_per_second": 3.611, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0002900600578278724, + "loss": 0.7354, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00028298810998579155, + "loss": 0.7332, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9031808248773262, + "eval_loss": 0.8125725388526917, + "eval_precision": 0.9003670553899056, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.2749, + "eval_samples_per_second": 495.952, + "eval_steps_per_second": 3.986, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002751898283563437, + "loss": 0.7273, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.9053172320526279, + "eval_loss": 0.8211202621459961, + "eval_precision": 0.9063961058036959, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.0632, + "eval_samples_per_second": 447.463, + "eval_steps_per_second": 3.597, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002667106350059975, + "loss": 0.7327, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00025759991805668315, + "loss": 0.719, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.9035208066119936, + "eval_loss": 0.8288699388504028, + "eval_precision": 0.9032464189243595, + "eval_recall": 0.9054878048780488, + "eval_runtime": 7.9094, + "eval_samples_per_second": 456.164, + "eval_steps_per_second": 3.667, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002479107440185005, + "loss": 0.7124, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0002376995486971952, + "loss": 0.7145, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9004988913525499, + "eval_f1": 0.8974454419066261, + "eval_loss": 0.8292616009712219, + "eval_precision": 0.8973082093928275, + "eval_recall": 0.9004988913525499, + "eval_runtime": 7.9372, + "eval_samples_per_second": 454.569, + "eval_steps_per_second": 3.654, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002270258084767513, + "loss": 0.7118, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.902497626684123, + "eval_loss": 0.8220421671867371, + "eval_precision": 0.9044010330245825, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.9281, + "eval_samples_per_second": 455.09, + "eval_steps_per_second": 3.658, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00021595169389175602, + "loss": 0.7073, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00020454170750734658, + "loss": 0.7069, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9056654724686274, + "eval_loss": 0.8235394358634949, + "eval_precision": 0.9042915144530349, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.8365, + "eval_samples_per_second": 460.411, + "eval_steps_per_second": 3.701, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0001928623082159542, + "loss": 0.7063, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001809815241391751, + "loss": 0.7079, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9069201728077497, + "eval_loss": 0.8186624050140381, + "eval_precision": 0.903244901098, + "eval_recall": 0.9110310421286031, + "eval_runtime": 7.885, + "eval_samples_per_second": 457.577, + "eval_steps_per_second": 3.678, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001689685563894695, + "loss": 0.7017, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.9037345141494774, + "eval_loss": 0.8171346187591553, + "eval_precision": 0.9027453278381381, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.8218, + "eval_samples_per_second": 461.275, + "eval_steps_per_second": 3.708, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00015689337599962926, + "loss": 0.7036, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00014482631636774794, + "loss": 0.6969, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.905217500677941, + "eval_loss": 0.8182144165039062, + "eval_precision": 0.903239742582158, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.0493, + "eval_samples_per_second": 448.235, + "eval_steps_per_second": 3.603, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013283766359154891, + "loss": 0.6974, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00012099724707822041, + "loss": 0.6973, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9043774216686182, + "eval_loss": 0.8198089003562927, + "eval_precision": 0.9019819697367276, + "eval_recall": 0.9096452328159645, + "eval_runtime": 8.473, + "eval_samples_per_second": 425.825, + "eval_steps_per_second": 3.423, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00010937403281430179, + "loss": 0.694, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9063137850762539, + "eval_loss": 0.8276586532592773, + "eval_precision": 0.9062591531950064, + "eval_recall": 0.9065964523281597, + "eval_runtime": 8.0348, + "eval_samples_per_second": 449.047, + "eval_steps_per_second": 3.609, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 9.803572166467102e-05, + "loss": 0.695, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 8.704835504039208e-05, + "loss": 0.693, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9139430890329873, + "eval_loss": 0.8102039694786072, + "eval_precision": 0.9122430119641018, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9092, + "eval_samples_per_second": 456.175, + "eval_steps_per_second": 3.667, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 7.647593023225772e-05, + "loss": 0.6913, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.912676448858518, + "eval_loss": 0.81104975938797, + "eval_precision": 0.9103593970093129, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.094, + "eval_samples_per_second": 445.764, + "eval_steps_per_second": 3.583, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.638002765056734e-05, + "loss": 0.69, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.681945214232673e-05, + "loss": 0.6906, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9138439115465644, + "eval_loss": 0.8099184632301331, + "eval_precision": 0.9100073126950153, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.7592, + "eval_samples_per_second": 464.996, + "eval_steps_per_second": 3.737, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 4.784989047506355e-05, + "loss": 0.686, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.952358698228685e-05, + "loss": 0.6881, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.912189288541431, + "eval_loss": 0.8164031505584717, + "eval_precision": 0.9108991477989876, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.6231, + "eval_samples_per_second": 473.3, + "eval_steps_per_second": 3.804, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.188903925983368e-05, + "loss": 0.6884, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9126317679479089, + "eval_loss": 0.8096389174461365, + "eval_precision": 0.9100287969011175, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.8599, + "eval_samples_per_second": 459.039, + "eval_steps_per_second": 3.69, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.499071568555766e-05, + "loss": 0.6851, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.88687964077021e-05, + "loss": 0.6868, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9167079498555535, + "eval_loss": 0.8074877858161926, + "eval_precision": 0.9151315411456494, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.049, + "eval_samples_per_second": 448.256, + "eval_steps_per_second": 3.603, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.3558939310602103e-05, + "loss": 0.6869, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.092072320878426e-06, + "loss": 0.6851, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.913368621625012, + "eval_loss": 0.8071135878562927, + "eval_precision": 0.9104082551170866, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.0697, + "eval_samples_per_second": 447.105, + "eval_steps_per_second": 3.594, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.494213263861758e-06, + "loss": 0.6845, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9143105338959607, + "eval_loss": 0.8059723973274231, + "eval_precision": 0.9109660514202674, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.0085, + "eval_samples_per_second": 450.519, + "eval_steps_per_second": 3.621, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.7863183195156486e-06, + "loss": 0.6852, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 9.841599605458681e-07, + "loss": 0.6866, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9254434589800443, + "eval_f1": 0.9218744662805249, + "eval_loss": 0.797498345375061, + "eval_precision": 0.9203264793734605, + "eval_recall": 0.9254434589800443, + "eval_runtime": 7.9877, + "eval_samples_per_second": 451.696, + "eval_steps_per_second": 3.631, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.22931029995038135, + "learning_rate": 0.0003165751745670321, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-acof4i3t/checkpoint-616/training_args.bin b/run-acof4i3t/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9bf01e04021555dd59e3e700d7e5983219e23cc --- /dev/null +++ b/run-acof4i3t/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2ff9b5cfeac1c1bc5b6936052ec79bc0e0efe4eb5bd49623fb30f1a9c81883 +size 4792 diff --git a/run-acof4i3t/checkpoint-630/model.safetensors b/run-acof4i3t/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5574d99abaead5bcbbeb23453b0df781247cdd2d --- /dev/null +++ b/run-acof4i3t/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7445540ae955cabada4b65dbc20533b1fb3f01946d150f2380e9ffab5f2b44 +size 198025308 diff --git a/run-acof4i3t/checkpoint-630/optimizer.pt b/run-acof4i3t/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c2599f5b6c9f72cb2033ff418dff718319c57cc --- /dev/null +++ b/run-acof4i3t/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd1ebce2cf7d8a8bd9631e8dae562ab58c5ab149a7c2ec50f9d49d821ab7f9f3 +size 395900602 diff --git a/run-acof4i3t/checkpoint-630/rng_state.pth b/run-acof4i3t/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-acof4i3t/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-acof4i3t/checkpoint-630/scheduler.pt b/run-acof4i3t/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9dd94c769de48134f1448676a1536594e48edc65 --- /dev/null +++ b/run-acof4i3t/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd9eb5657ed0f959254535aa01fa440f1fe882beefd8b36327de0c7aecd7ca5b +size 1064 diff --git a/run-acof4i3t/checkpoint-630/trainer_state.json b/run-acof4i3t/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3ef9ee38ac1872e14a4352d422b0acb18750552e --- /dev/null +++ b/run-acof4i3t/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9218744662805249, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-acof4i3t/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.332081336180439e-05, + "loss": 1.3688, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9812324047088623, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.184, + "eval_samples_per_second": 440.862, + "eval_steps_per_second": 3.544, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 8.664162672360879e-05, + "loss": 0.9883, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012996244008541317, + "loss": 0.8638, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.9007760532150776, + "eval_f1": 0.8874209694202322, + "eval_loss": 0.9336458444595337, + "eval_precision": 0.8877094497284251, + "eval_recall": 0.9007760532150776, + "eval_runtime": 7.9916, + "eval_samples_per_second": 451.471, + "eval_steps_per_second": 3.629, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017328325344721757, + "loss": 0.8078, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9111952437633513, + "eval_loss": 0.7998877167701721, + "eval_precision": 0.9085742834607444, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.8937, + "eval_samples_per_second": 457.072, + "eval_steps_per_second": 3.674, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00021660406680902195, + "loss": 0.7931, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00025992488017082633, + "loss": 0.7759, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.8914998911311324, + "eval_loss": 0.8200076818466187, + "eval_precision": 0.8969210963806538, + "eval_recall": 0.9032705099778271, + "eval_runtime": 7.8962, + "eval_samples_per_second": 456.926, + "eval_steps_per_second": 3.673, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00030324569353263074, + "loss": 0.7868, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031635417420858986, + "loss": 0.7649, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9069203034282971, + "eval_loss": 0.8162654638290405, + "eval_precision": 0.9044167844360006, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.3891, + "eval_samples_per_second": 430.083, + "eval_steps_per_second": 3.457, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003152561577959659, + "loss": 0.7729, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8614190687361419, + "eval_f1": 0.8677684719509549, + "eval_loss": 0.8950210809707642, + "eval_precision": 0.8906370624912869, + "eval_recall": 0.8614190687361419, + "eval_runtime": 8.1853, + "eval_samples_per_second": 440.793, + "eval_steps_per_second": 3.543, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003132438583166212, + "loss": 0.7618, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0003103289966604968, + "loss": 0.7534, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.9010951690369824, + "eval_loss": 0.82647305727005, + "eval_precision": 0.9079991467446424, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.3308, + "eval_samples_per_second": 433.094, + "eval_steps_per_second": 3.481, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00030652855080379074, + "loss": 0.7463, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00030186465691861336, + "loss": 0.7372, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8478381374722838, + "eval_f1": 0.8635590930258699, + "eval_loss": 0.9075872302055359, + "eval_precision": 0.8971633502490409, + "eval_recall": 0.8478381374722838, + "eval_runtime": 7.8836, + "eval_samples_per_second": 457.661, + "eval_steps_per_second": 3.679, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002963644804380847, + "loss": 0.7325, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.904379157427938, + "eval_f1": 0.9011642461264477, + "eval_loss": 0.8325739502906799, + "eval_precision": 0.9022742364410467, + "eval_recall": 0.904379157427938, + "eval_runtime": 8.031, + "eval_samples_per_second": 449.256, + "eval_steps_per_second": 3.611, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0002900600578278724, + "loss": 0.7354, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00028298810998579155, + "loss": 0.7332, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9031808248773262, + "eval_loss": 0.8125725388526917, + "eval_precision": 0.9003670553899056, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.2749, + "eval_samples_per_second": 495.952, + "eval_steps_per_second": 3.986, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002751898283563437, + "loss": 0.7273, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.9053172320526279, + "eval_loss": 0.8211202621459961, + "eval_precision": 0.9063961058036959, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.0632, + "eval_samples_per_second": 447.463, + "eval_steps_per_second": 3.597, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002667106350059975, + "loss": 0.7327, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00025759991805668315, + "loss": 0.719, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.9035208066119936, + "eval_loss": 0.8288699388504028, + "eval_precision": 0.9032464189243595, + "eval_recall": 0.9054878048780488, + "eval_runtime": 7.9094, + "eval_samples_per_second": 456.164, + "eval_steps_per_second": 3.667, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002479107440185005, + "loss": 0.7124, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0002376995486971952, + "loss": 0.7145, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9004988913525499, + "eval_f1": 0.8974454419066261, + "eval_loss": 0.8292616009712219, + "eval_precision": 0.8973082093928275, + "eval_recall": 0.9004988913525499, + "eval_runtime": 7.9372, + "eval_samples_per_second": 454.569, + "eval_steps_per_second": 3.654, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002270258084767513, + "loss": 0.7118, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.902497626684123, + "eval_loss": 0.8220421671867371, + "eval_precision": 0.9044010330245825, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.9281, + "eval_samples_per_second": 455.09, + "eval_steps_per_second": 3.658, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00021595169389175602, + "loss": 0.7073, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00020454170750734658, + "loss": 0.7069, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9056654724686274, + "eval_loss": 0.8235394358634949, + "eval_precision": 0.9042915144530349, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.8365, + "eval_samples_per_second": 460.411, + "eval_steps_per_second": 3.701, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0001928623082159542, + "loss": 0.7063, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001809815241391751, + "loss": 0.7079, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9069201728077497, + "eval_loss": 0.8186624050140381, + "eval_precision": 0.903244901098, + "eval_recall": 0.9110310421286031, + "eval_runtime": 7.885, + "eval_samples_per_second": 457.577, + "eval_steps_per_second": 3.678, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001689685563894695, + "loss": 0.7017, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.9037345141494774, + "eval_loss": 0.8171346187591553, + "eval_precision": 0.9027453278381381, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.8218, + "eval_samples_per_second": 461.275, + "eval_steps_per_second": 3.708, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00015689337599962926, + "loss": 0.7036, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00014482631636774794, + "loss": 0.6969, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.905217500677941, + "eval_loss": 0.8182144165039062, + "eval_precision": 0.903239742582158, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.0493, + "eval_samples_per_second": 448.235, + "eval_steps_per_second": 3.603, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013283766359154891, + "loss": 0.6974, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00012099724707822041, + "loss": 0.6973, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9043774216686182, + "eval_loss": 0.8198089003562927, + "eval_precision": 0.9019819697367276, + "eval_recall": 0.9096452328159645, + "eval_runtime": 8.473, + "eval_samples_per_second": 425.825, + "eval_steps_per_second": 3.423, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00010937403281430179, + "loss": 0.694, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9063137850762539, + "eval_loss": 0.8276586532592773, + "eval_precision": 0.9062591531950064, + "eval_recall": 0.9065964523281597, + "eval_runtime": 8.0348, + "eval_samples_per_second": 449.047, + "eval_steps_per_second": 3.609, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 9.803572166467102e-05, + "loss": 0.695, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 8.704835504039208e-05, + "loss": 0.693, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9139430890329873, + "eval_loss": 0.8102039694786072, + "eval_precision": 0.9122430119641018, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9092, + "eval_samples_per_second": 456.175, + "eval_steps_per_second": 3.667, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 7.647593023225772e-05, + "loss": 0.6913, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.912676448858518, + "eval_loss": 0.81104975938797, + "eval_precision": 0.9103593970093129, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.094, + "eval_samples_per_second": 445.764, + "eval_steps_per_second": 3.583, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.638002765056734e-05, + "loss": 0.69, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.681945214232673e-05, + "loss": 0.6906, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9138439115465644, + "eval_loss": 0.8099184632301331, + "eval_precision": 0.9100073126950153, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.7592, + "eval_samples_per_second": 464.996, + "eval_steps_per_second": 3.737, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 4.784989047506355e-05, + "loss": 0.686, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.952358698228685e-05, + "loss": 0.6881, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.912189288541431, + "eval_loss": 0.8164031505584717, + "eval_precision": 0.9108991477989876, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.6231, + "eval_samples_per_second": 473.3, + "eval_steps_per_second": 3.804, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.188903925983368e-05, + "loss": 0.6884, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9126317679479089, + "eval_loss": 0.8096389174461365, + "eval_precision": 0.9100287969011175, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.8599, + "eval_samples_per_second": 459.039, + "eval_steps_per_second": 3.69, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.499071568555766e-05, + "loss": 0.6851, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.88687964077021e-05, + "loss": 0.6868, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9167079498555535, + "eval_loss": 0.8074877858161926, + "eval_precision": 0.9151315411456494, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.049, + "eval_samples_per_second": 448.256, + "eval_steps_per_second": 3.603, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.3558939310602103e-05, + "loss": 0.6869, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.092072320878426e-06, + "loss": 0.6851, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.913368621625012, + "eval_loss": 0.8071135878562927, + "eval_precision": 0.9104082551170866, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.0697, + "eval_samples_per_second": 447.105, + "eval_steps_per_second": 3.594, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.494213263861758e-06, + "loss": 0.6845, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9143105338959607, + "eval_loss": 0.8059723973274231, + "eval_precision": 0.9109660514202674, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.0085, + "eval_samples_per_second": 450.519, + "eval_steps_per_second": 3.621, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.7863183195156486e-06, + "loss": 0.6852, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 9.841599605458681e-07, + "loss": 0.6866, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9254434589800443, + "eval_f1": 0.9218744662805249, + "eval_loss": 0.797498345375061, + "eval_precision": 0.9203264793734605, + "eval_recall": 0.9254434589800443, + "eval_runtime": 7.9877, + "eval_samples_per_second": 451.696, + "eval_steps_per_second": 3.631, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 9.823508366097094e-08, + "loss": 0.6823, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9169865033821633, + "eval_loss": 0.807928204536438, + "eval_precision": 0.9138215177254612, + "eval_recall": 0.9212860310421286, + "eval_runtime": 7.9982, + "eval_samples_per_second": 451.102, + "eval_steps_per_second": 3.626, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.22931029995038135, + "learning_rate": 0.0003165751745670321, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-acof4i3t/checkpoint-630/training_args.bin b/run-acof4i3t/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9bf01e04021555dd59e3e700d7e5983219e23cc --- /dev/null +++ b/run-acof4i3t/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2ff9b5cfeac1c1bc5b6936052ec79bc0e0efe4eb5bd49623fb30f1a9c81883 +size 4792 diff --git a/run-ajqikil9/checkpoint-531/model.safetensors b/run-ajqikil9/checkpoint-531/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..366911b782ca7c2da3f95c9ae2896bd8d6f486f9 --- /dev/null +++ b/run-ajqikil9/checkpoint-531/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ac5ea841bb3ba10928e8a2280ef4c1a5d8b99c4e497b1b5041e7f9833552fb +size 198025308 diff --git a/run-ajqikil9/checkpoint-531/optimizer.pt b/run-ajqikil9/checkpoint-531/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..37a6190bbb251b4a3773db10ec2580f92b5df192 --- /dev/null +++ b/run-ajqikil9/checkpoint-531/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04e637b6717c9511b41e25e82608a9a43fbeba4872d8e2fec19a642ed51b06ca +size 395900602 diff --git a/run-ajqikil9/checkpoint-531/rng_state.pth b/run-ajqikil9/checkpoint-531/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f50e97f6b8f8d895fe76d5cec6c2f434c3883a4 --- /dev/null +++ b/run-ajqikil9/checkpoint-531/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ebc248f8c573102043dc64b4a3df9b7043de65474577f599703b3d52b79074 +size 14244 diff --git a/run-ajqikil9/checkpoint-531/scheduler.pt b/run-ajqikil9/checkpoint-531/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ceca923f4fa50daf637f39b1278160915460d67 --- /dev/null +++ b/run-ajqikil9/checkpoint-531/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd6085c3daa21dd28f9d8a7e4bcfdc49e8caf307fd853ca1d0ae49b013d40fc2 +size 1064 diff --git a/run-ajqikil9/checkpoint-531/trainer_state.json b/run-ajqikil9/checkpoint-531/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8c8e73edbd8c799a21fd45b665a9c91b15329d23 --- /dev/null +++ b/run-ajqikil9/checkpoint-531/trainer_state.json @@ -0,0 +1,568 @@ +{ + "best_metric": 0.9127336610954928, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-ajqikil9/checkpoint-531", + "epoch": 24.988235294117647, + "eval_steps": 500, + "global_step": 531, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00010880676467660402, + "loss": 1.2623, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8783259423503326, + "eval_f1": 0.8514785750121028, + "eval_loss": 0.8922330141067505, + "eval_precision": 0.871381267570629, + "eval_recall": 0.8783259423503326, + "eval_runtime": 8.1525, + "eval_samples_per_second": 442.564, + "eval_steps_per_second": 3.557, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00021761352935320804, + "loss": 0.9208, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.000326420294029812, + "loss": 0.8158, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.891629711751663, + "eval_f1": 0.8752975660491312, + "eval_loss": 0.8437784314155579, + "eval_precision": 0.8772241309564259, + "eval_recall": 0.891629711751663, + "eval_runtime": 7.9185, + "eval_samples_per_second": 455.643, + "eval_steps_per_second": 3.662, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0004352270587064161, + "loss": 0.7988, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.2738359201773836, + "eval_f1": 0.3113742209077723, + "eval_loss": 2.063048839569092, + "eval_precision": 0.8516852602800928, + "eval_recall": 0.2738359201773836, + "eval_runtime": 7.6703, + "eval_samples_per_second": 470.387, + "eval_steps_per_second": 3.781, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00054403382338302, + "loss": 0.7897, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.000652840588059624, + "loss": 0.7867, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8392461197339246, + "eval_f1": 0.7758593758001663, + "eval_loss": 0.9726565480232239, + "eval_precision": 0.844855508196414, + "eval_recall": 0.8392461197339246, + "eval_runtime": 8.004, + "eval_samples_per_second": 450.777, + "eval_steps_per_second": 3.623, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0007616473527362281, + "loss": 0.8009, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0007945712814783981, + "loss": 0.7939, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.6424611973392461, + "eval_f1": 0.6944809045328648, + "eval_loss": 1.1976085901260376, + "eval_precision": 0.82071758708475, + "eval_recall": 0.6424611973392461, + "eval_runtime": 7.6748, + "eval_samples_per_second": 470.11, + "eval_steps_per_second": 3.779, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0007918134474455596, + "loss": 0.7938, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8686252771618626, + "eval_f1": 0.8447245445831741, + "eval_loss": 0.8889691829681396, + "eval_precision": 0.8579263494985153, + "eval_recall": 0.8686252771618626, + "eval_runtime": 7.3719, + "eval_samples_per_second": 489.428, + "eval_steps_per_second": 3.934, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007867592534238713, + "loss": 0.7913, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007794381381984028, + "loss": 0.7838, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8664079822616408, + "eval_f1": 0.8516533082811049, + "eval_loss": 0.8968164920806885, + "eval_precision": 0.8730293116906714, + "eval_recall": 0.8664079822616408, + "eval_runtime": 8.3056, + "eval_samples_per_second": 434.407, + "eval_steps_per_second": 3.492, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007698927445202365, + "loss": 0.779, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007581786707284325, + "loss": 0.7773, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8555986696230599, + "eval_f1": 0.8490315434534891, + "eval_loss": 0.9065197706222534, + "eval_precision": 0.8805924254327685, + "eval_recall": 0.8555986696230599, + "eval_runtime": 7.7813, + "eval_samples_per_second": 463.675, + "eval_steps_per_second": 3.727, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007443641469105503, + "loss": 0.7715, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.7139689578713969, + "eval_f1": 0.7631231323650722, + "eval_loss": 1.084621548652649, + "eval_precision": 0.8668589059540538, + "eval_recall": 0.7139689578713969, + "eval_runtime": 7.8487, + "eval_samples_per_second": 459.696, + "eval_steps_per_second": 3.695, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0007285296374879724, + "loss": 0.7723, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00071076737254082, + "loss": 0.7775, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8575388026607539, + "eval_f1": 0.8138028869796703, + "eval_loss": 0.9261995553970337, + "eval_precision": 0.8536375809372175, + "eval_recall": 0.8575388026607539, + "eval_runtime": 7.8921, + "eval_samples_per_second": 457.164, + "eval_steps_per_second": 3.675, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0006911808106023191, + "loss": 0.7663, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8367516629711752, + "eval_f1": 0.8505960595224487, + "eval_loss": 0.9176233410835266, + "eval_precision": 0.8837524886799399, + "eval_recall": 0.8367516629711752, + "eval_runtime": 8.0294, + "eval_samples_per_second": 449.351, + "eval_steps_per_second": 3.612, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0006698840360516365, + "loss": 0.7791, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0006470010946151487, + "loss": 0.7599, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8847033732031605, + "eval_loss": 0.843707799911499, + "eval_precision": 0.8846158701350423, + "eval_recall": 0.8902439024390244, + "eval_runtime": 7.575, + "eval_samples_per_second": 476.305, + "eval_steps_per_second": 3.828, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006226652708465968, + "loss": 0.7499, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0005970183117945374, + "loss": 0.751, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.88470066518847, + "eval_f1": 0.8688061048947577, + "eval_loss": 0.8631225228309631, + "eval_precision": 0.8702897020626073, + "eval_recall": 0.88470066518847, + "eval_runtime": 7.5594, + "eval_samples_per_second": 477.287, + "eval_steps_per_second": 3.836, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0005702096013789333, + "loss": 0.7459, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.886640798226164, + "eval_f1": 0.8669405850245407, + "eval_loss": 0.8703803420066833, + "eval_precision": 0.8756127804251161, + "eval_recall": 0.886640798226164, + "eval_runtime": 8.054, + "eval_samples_per_second": 447.974, + "eval_steps_per_second": 3.601, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005423952902858338, + "loss": 0.7451, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0005137373864481764, + "loss": 0.7457, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8926495600543535, + "eval_loss": 0.8354615569114685, + "eval_precision": 0.8908736260612043, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.74, + "eval_samples_per_second": 466.149, + "eval_steps_per_second": 3.747, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00048440281141032455, + "loss": 0.7313, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00045456242807266037, + "loss": 0.7356, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8855321507760532, + "eval_f1": 0.8672172304165384, + "eval_loss": 0.857757568359375, + "eval_precision": 0.8756974987782665, + "eval_recall": 0.8855321507760532, + "eval_runtime": 7.6819, + "eval_samples_per_second": 469.674, + "eval_steps_per_second": 3.775, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00042439004547925537, + "loss": 0.7293, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8837352251199658, + "eval_loss": 0.8487399220466614, + "eval_precision": 0.8831957208873402, + "eval_recall": 0.893569844789357, + "eval_runtime": 8.3472, + "eval_samples_per_second": 432.242, + "eval_steps_per_second": 3.474, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003940614064453607, + "loss": 0.7223, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003637531639214034, + "loss": 0.717, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.8960643015521065, + "eval_f1": 0.8822000903269557, + "eval_loss": 0.8478697538375854, + "eval_precision": 0.8847962627217263, + "eval_recall": 0.8960643015521065, + "eval_runtime": 7.8922, + "eval_samples_per_second": 457.158, + "eval_steps_per_second": 3.674, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00033364185205578815, + "loss": 0.7147, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00030390285794967543, + "loss": 0.7164, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.9021341111395762, + "eval_loss": 0.8276687264442444, + "eval_precision": 0.9011456401377895, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.5968, + "eval_samples_per_second": 474.939, + "eval_steps_per_second": 3.817, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002747094000928799, + "loss": 0.709, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.897450110864745, + "eval_f1": 0.8956001885664088, + "eval_loss": 0.8337739109992981, + "eval_precision": 0.8950342884262416, + "eval_recall": 0.897450110864745, + "eval_runtime": 8.0889, + "eval_samples_per_second": 446.042, + "eval_steps_per_second": 3.585, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00024623151943111647, + "loss": 0.7098, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.0002186350889412505, + "loss": 0.7066, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.908025430715101, + "eval_loss": 0.8176734447479248, + "eval_precision": 0.9064770745899577, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9677, + "eval_samples_per_second": 452.829, + "eval_steps_per_second": 3.64, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001920808474834015, + "loss": 0.7058, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9079822616407982, + "eval_f1": 0.9031860581578536, + "eval_loss": 0.8213143944740295, + "eval_precision": 0.8990554293765749, + "eval_recall": 0.9079822616407982, + "eval_runtime": 7.4562, + "eval_samples_per_second": 483.895, + "eval_steps_per_second": 3.889, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00016672346355735442, + "loss": 0.7043, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.00014271063441654186, + "loss": 0.6982, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.91144684828406, + "eval_loss": 0.8110450506210327, + "eval_precision": 0.909867070598627, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.9848, + "eval_samples_per_second": 451.858, + "eval_steps_per_second": 3.632, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 0.00012018222578692274, + "loss": 0.6961, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 9.926945720157303e-05, + "loss": 0.6964, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9069916792101534, + "eval_loss": 0.8217727541923523, + "eval_precision": 0.9056571838174233, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.2233, + "eval_samples_per_second": 438.751, + "eval_steps_per_second": 3.527, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 8.009413769610691e-05, + "loss": 0.6969, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9127336610954928, + "eval_loss": 0.80921870470047, + "eval_precision": 0.9101458893377908, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.9462, + "eval_samples_per_second": 454.051, + "eval_steps_per_second": 3.65, + "step": 531 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.10354081551652689, + "learning_rate": 0.0007951263572521062, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-ajqikil9/checkpoint-531/training_args.bin b/run-ajqikil9/checkpoint-531/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0c325f5d9d9655990b54043f10b0e7a822f7476d --- /dev/null +++ b/run-ajqikil9/checkpoint-531/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb29792d72550ff6f1051bcaf5a877458ae30fe9675cc11cb8e35851ac982c8d +size 4792 diff --git a/run-ajqikil9/checkpoint-630/model.safetensors b/run-ajqikil9/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a154e93737779f4d2289ea0e8f3c319e3efc965c --- /dev/null +++ b/run-ajqikil9/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a9356152a0afe9d53d656286a3b0c9ea9a4c6f6d020ffb91509a54d5af8f02 +size 198025308 diff --git a/run-ajqikil9/checkpoint-630/optimizer.pt b/run-ajqikil9/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a1c4a88518dee2df1ee64fcf343e535f9db30cc --- /dev/null +++ b/run-ajqikil9/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592ffa975643764a6f0a06b03ddc1fea57a732305efbe6f9906f9d7229692890 +size 395900602 diff --git a/run-ajqikil9/checkpoint-630/rng_state.pth b/run-ajqikil9/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-ajqikil9/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-ajqikil9/checkpoint-630/scheduler.pt b/run-ajqikil9/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3195cef83c0bd2e5bf384cbc1371110a93b299ae --- /dev/null +++ b/run-ajqikil9/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fad99443d2740abf256b27dc60f5e72a43676f173aad7e3362e2ef46cc36d94 +size 1064 diff --git a/run-ajqikil9/checkpoint-630/trainer_state.json b/run-ajqikil9/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5189834a4854b8cdc65ab3dd05301787097b6b26 --- /dev/null +++ b/run-ajqikil9/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9127336610954928, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-ajqikil9/checkpoint-531", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00010880676467660402, + "loss": 1.2623, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8783259423503326, + "eval_f1": 0.8514785750121028, + "eval_loss": 0.8922330141067505, + "eval_precision": 0.871381267570629, + "eval_recall": 0.8783259423503326, + "eval_runtime": 8.1525, + "eval_samples_per_second": 442.564, + "eval_steps_per_second": 3.557, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00021761352935320804, + "loss": 0.9208, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.000326420294029812, + "loss": 0.8158, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.891629711751663, + "eval_f1": 0.8752975660491312, + "eval_loss": 0.8437784314155579, + "eval_precision": 0.8772241309564259, + "eval_recall": 0.891629711751663, + "eval_runtime": 7.9185, + "eval_samples_per_second": 455.643, + "eval_steps_per_second": 3.662, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0004352270587064161, + "loss": 0.7988, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.2738359201773836, + "eval_f1": 0.3113742209077723, + "eval_loss": 2.063048839569092, + "eval_precision": 0.8516852602800928, + "eval_recall": 0.2738359201773836, + "eval_runtime": 7.6703, + "eval_samples_per_second": 470.387, + "eval_steps_per_second": 3.781, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00054403382338302, + "loss": 0.7897, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.000652840588059624, + "loss": 0.7867, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8392461197339246, + "eval_f1": 0.7758593758001663, + "eval_loss": 0.9726565480232239, + "eval_precision": 0.844855508196414, + "eval_recall": 0.8392461197339246, + "eval_runtime": 8.004, + "eval_samples_per_second": 450.777, + "eval_steps_per_second": 3.623, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0007616473527362281, + "loss": 0.8009, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0007945712814783981, + "loss": 0.7939, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.6424611973392461, + "eval_f1": 0.6944809045328648, + "eval_loss": 1.1976085901260376, + "eval_precision": 0.82071758708475, + "eval_recall": 0.6424611973392461, + "eval_runtime": 7.6748, + "eval_samples_per_second": 470.11, + "eval_steps_per_second": 3.779, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0007918134474455596, + "loss": 0.7938, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8686252771618626, + "eval_f1": 0.8447245445831741, + "eval_loss": 0.8889691829681396, + "eval_precision": 0.8579263494985153, + "eval_recall": 0.8686252771618626, + "eval_runtime": 7.3719, + "eval_samples_per_second": 489.428, + "eval_steps_per_second": 3.934, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007867592534238713, + "loss": 0.7913, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007794381381984028, + "loss": 0.7838, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8664079822616408, + "eval_f1": 0.8516533082811049, + "eval_loss": 0.8968164920806885, + "eval_precision": 0.8730293116906714, + "eval_recall": 0.8664079822616408, + "eval_runtime": 8.3056, + "eval_samples_per_second": 434.407, + "eval_steps_per_second": 3.492, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007698927445202365, + "loss": 0.779, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007581786707284325, + "loss": 0.7773, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8555986696230599, + "eval_f1": 0.8490315434534891, + "eval_loss": 0.9065197706222534, + "eval_precision": 0.8805924254327685, + "eval_recall": 0.8555986696230599, + "eval_runtime": 7.7813, + "eval_samples_per_second": 463.675, + "eval_steps_per_second": 3.727, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007443641469105503, + "loss": 0.7715, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.7139689578713969, + "eval_f1": 0.7631231323650722, + "eval_loss": 1.084621548652649, + "eval_precision": 0.8668589059540538, + "eval_recall": 0.7139689578713969, + "eval_runtime": 7.8487, + "eval_samples_per_second": 459.696, + "eval_steps_per_second": 3.695, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0007285296374879724, + "loss": 0.7723, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00071076737254082, + "loss": 0.7775, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8575388026607539, + "eval_f1": 0.8138028869796703, + "eval_loss": 0.9261995553970337, + "eval_precision": 0.8536375809372175, + "eval_recall": 0.8575388026607539, + "eval_runtime": 7.8921, + "eval_samples_per_second": 457.164, + "eval_steps_per_second": 3.675, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0006911808106023191, + "loss": 0.7663, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8367516629711752, + "eval_f1": 0.8505960595224487, + "eval_loss": 0.9176233410835266, + "eval_precision": 0.8837524886799399, + "eval_recall": 0.8367516629711752, + "eval_runtime": 8.0294, + "eval_samples_per_second": 449.351, + "eval_steps_per_second": 3.612, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0006698840360516365, + "loss": 0.7791, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0006470010946151487, + "loss": 0.7599, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8847033732031605, + "eval_loss": 0.843707799911499, + "eval_precision": 0.8846158701350423, + "eval_recall": 0.8902439024390244, + "eval_runtime": 7.575, + "eval_samples_per_second": 476.305, + "eval_steps_per_second": 3.828, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006226652708465968, + "loss": 0.7499, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0005970183117945374, + "loss": 0.751, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.88470066518847, + "eval_f1": 0.8688061048947577, + "eval_loss": 0.8631225228309631, + "eval_precision": 0.8702897020626073, + "eval_recall": 0.88470066518847, + "eval_runtime": 7.5594, + "eval_samples_per_second": 477.287, + "eval_steps_per_second": 3.836, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0005702096013789333, + "loss": 0.7459, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.886640798226164, + "eval_f1": 0.8669405850245407, + "eval_loss": 0.8703803420066833, + "eval_precision": 0.8756127804251161, + "eval_recall": 0.886640798226164, + "eval_runtime": 8.054, + "eval_samples_per_second": 447.974, + "eval_steps_per_second": 3.601, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005423952902858338, + "loss": 0.7451, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0005137373864481764, + "loss": 0.7457, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8926495600543535, + "eval_loss": 0.8354615569114685, + "eval_precision": 0.8908736260612043, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.74, + "eval_samples_per_second": 466.149, + "eval_steps_per_second": 3.747, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00048440281141032455, + "loss": 0.7313, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00045456242807266037, + "loss": 0.7356, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8855321507760532, + "eval_f1": 0.8672172304165384, + "eval_loss": 0.857757568359375, + "eval_precision": 0.8756974987782665, + "eval_recall": 0.8855321507760532, + "eval_runtime": 7.6819, + "eval_samples_per_second": 469.674, + "eval_steps_per_second": 3.775, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00042439004547925537, + "loss": 0.7293, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8837352251199658, + "eval_loss": 0.8487399220466614, + "eval_precision": 0.8831957208873402, + "eval_recall": 0.893569844789357, + "eval_runtime": 8.3472, + "eval_samples_per_second": 432.242, + "eval_steps_per_second": 3.474, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003940614064453607, + "loss": 0.7223, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003637531639214034, + "loss": 0.717, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.8960643015521065, + "eval_f1": 0.8822000903269557, + "eval_loss": 0.8478697538375854, + "eval_precision": 0.8847962627217263, + "eval_recall": 0.8960643015521065, + "eval_runtime": 7.8922, + "eval_samples_per_second": 457.158, + "eval_steps_per_second": 3.674, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00033364185205578815, + "loss": 0.7147, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00030390285794967543, + "loss": 0.7164, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.9021341111395762, + "eval_loss": 0.8276687264442444, + "eval_precision": 0.9011456401377895, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.5968, + "eval_samples_per_second": 474.939, + "eval_steps_per_second": 3.817, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002747094000928799, + "loss": 0.709, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.897450110864745, + "eval_f1": 0.8956001885664088, + "eval_loss": 0.8337739109992981, + "eval_precision": 0.8950342884262416, + "eval_recall": 0.897450110864745, + "eval_runtime": 8.0889, + "eval_samples_per_second": 446.042, + "eval_steps_per_second": 3.585, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00024623151943111647, + "loss": 0.7098, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.0002186350889412505, + "loss": 0.7066, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.908025430715101, + "eval_loss": 0.8176734447479248, + "eval_precision": 0.9064770745899577, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9677, + "eval_samples_per_second": 452.829, + "eval_steps_per_second": 3.64, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001920808474834015, + "loss": 0.7058, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9079822616407982, + "eval_f1": 0.9031860581578536, + "eval_loss": 0.8213143944740295, + "eval_precision": 0.8990554293765749, + "eval_recall": 0.9079822616407982, + "eval_runtime": 7.4562, + "eval_samples_per_second": 483.895, + "eval_steps_per_second": 3.889, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00016672346355735442, + "loss": 0.7043, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.00014271063441654186, + "loss": 0.6982, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.91144684828406, + "eval_loss": 0.8110450506210327, + "eval_precision": 0.909867070598627, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.9848, + "eval_samples_per_second": 451.858, + "eval_steps_per_second": 3.632, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 0.00012018222578692274, + "loss": 0.6961, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 9.926945720157303e-05, + "loss": 0.6964, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9069916792101534, + "eval_loss": 0.8217727541923523, + "eval_precision": 0.9056571838174233, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.2233, + "eval_samples_per_second": 438.751, + "eval_steps_per_second": 3.527, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 8.009413769610691e-05, + "loss": 0.6969, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9127336610954928, + "eval_loss": 0.80921870470047, + "eval_precision": 0.9101458893377908, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.9462, + "eval_samples_per_second": 454.051, + "eval_steps_per_second": 3.65, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 6.276795631671702e-05, + "loss": 0.6892, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 4.739183157335992e-05, + "loss": 0.6912, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9088234917345726, + "eval_loss": 0.8173182606697083, + "eval_precision": 0.9069568257179633, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8988, + "eval_samples_per_second": 456.776, + "eval_steps_per_second": 3.671, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.405532362727525e-05, + "loss": 0.6904, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.2836112636627523e-05, + "loss": 0.6892, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9028271025989751, + "eval_loss": 0.8220503330230713, + "eval_precision": 0.8998984867970935, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.762, + "eval_samples_per_second": 464.829, + "eval_steps_per_second": 3.736, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.3799546298712037e-05, + "loss": 0.6905, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9126108489136591, + "eval_loss": 0.8076441287994385, + "eval_precision": 0.9110663290214529, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.6691, + "eval_samples_per_second": 470.458, + "eval_steps_per_second": 3.781, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 6.9982592241204945e-06, + "loss": 0.6929, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.4718663598699787e-06, + "loss": 0.6892, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9122025826592276, + "eval_loss": 0.8127114176750183, + "eval_precision": 0.9105624751380598, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.9434, + "eval_samples_per_second": 454.214, + "eval_steps_per_second": 3.651, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 2.46732247190674e-07, + "loss": 0.6875, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9102824806546069, + "eval_loss": 0.8187455534934998, + "eval_precision": 0.9083024740491035, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.6669, + "eval_samples_per_second": 470.594, + "eval_steps_per_second": 3.782, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.10354081551652689, + "learning_rate": 0.0007951263572521062, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-ajqikil9/checkpoint-630/training_args.bin b/run-ajqikil9/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0c325f5d9d9655990b54043f10b0e7a822f7476d --- /dev/null +++ b/run-ajqikil9/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb29792d72550ff6f1051bcaf5a877458ae30fe9675cc11cb8e35851ac982c8d +size 4792 diff --git a/run-al5n9gw8/checkpoint-616/model.safetensors b/run-al5n9gw8/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b88dabfd8cb713e1e8bcc72589e38bc837cba8b --- /dev/null +++ b/run-al5n9gw8/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f25169c845f9e3120e3f54f1fc53d414908a74dd4b0a626ae513b904c61dbc65 +size 198025308 diff --git a/run-al5n9gw8/checkpoint-616/optimizer.pt b/run-al5n9gw8/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..06ef05417c30dfdafc50d1f8cf755f9642aaaeee --- /dev/null +++ b/run-al5n9gw8/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f77f76cf81167141d18ce62724f96cca441d365f5a92fcb23e5335e49cecbc +size 395900602 diff --git a/run-al5n9gw8/checkpoint-616/rng_state.pth b/run-al5n9gw8/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-al5n9gw8/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-al5n9gw8/checkpoint-616/scheduler.pt b/run-al5n9gw8/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..32d7c1a62a887af3103d43be9adda0b88ea9daac --- /dev/null +++ b/run-al5n9gw8/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d359a060b570e0a14f96c2cfcc25cb1fe92f17c0af620c4a6700c4c2866fd1a +size 1064 diff --git a/run-al5n9gw8/checkpoint-616/trainer_state.json b/run-al5n9gw8/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6b81210eaeef879cd3e27b3d6938676eea1fa6d1 --- /dev/null +++ b/run-al5n9gw8/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9180823883093728, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-al5n9gw8/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.127975290537408e-05, + "loss": 1.4486, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9894014000892639, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9885, + "eval_samples_per_second": 451.647, + "eval_steps_per_second": 3.63, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 4.255950581074816e-05, + "loss": 1.067, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 6.383925871612224e-05, + "loss": 0.9098, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.883869179600887, + "eval_f1": 0.8712563571511649, + "eval_loss": 0.9086562991142273, + "eval_precision": 0.8794857359604061, + "eval_recall": 0.883869179600887, + "eval_runtime": 7.8659, + "eval_samples_per_second": 458.691, + "eval_steps_per_second": 3.687, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 8.511901162149632e-05, + "loss": 0.8439, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9048518985618585, + "eval_loss": 0.8177870512008667, + "eval_precision": 0.9005270796812408, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.5734, + "eval_samples_per_second": 476.406, + "eval_steps_per_second": 3.829, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001063987645268704, + "loss": 0.8096, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00012767851743224448, + "loss": 0.7879, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.8975921543964994, + "eval_loss": 0.8090024590492249, + "eval_precision": 0.8976192874170199, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.4475, + "eval_samples_per_second": 484.46, + "eval_steps_per_second": 3.894, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014895827033761853, + "loss": 0.7892, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00015539732833543595, + "loss": 0.7661, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9059228720165633, + "eval_loss": 0.8293875455856323, + "eval_precision": 0.9095498470137525, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.1538, + "eval_samples_per_second": 442.495, + "eval_steps_per_second": 3.557, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00015485796824190443, + "loss": 0.7706, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.9006088853331862, + "eval_loss": 0.8217763900756836, + "eval_precision": 0.9084525448400171, + "eval_recall": 0.9032705099778271, + "eval_runtime": 7.7328, + "eval_samples_per_second": 466.582, + "eval_steps_per_second": 3.75, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00015386950028922693, + "loss": 0.7586, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00015243768193259454, + "loss": 0.7509, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9106200672087575, + "eval_loss": 0.7984517812728882, + "eval_precision": 0.9072051690629884, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.5816, + "eval_samples_per_second": 475.889, + "eval_steps_per_second": 3.825, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00015057085297706384, + "loss": 0.7483, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001482798870013196, + "loss": 0.7385, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.908451271578806, + "eval_loss": 0.8063976764678955, + "eval_precision": 0.9046163850802094, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8645, + "eval_samples_per_second": 458.77, + "eval_steps_per_second": 3.687, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00014557812802315612, + "loss": 0.7316, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9107906423012013, + "eval_loss": 0.8036113381385803, + "eval_precision": 0.9093959807082621, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.3281, + "eval_samples_per_second": 492.353, + "eval_steps_per_second": 3.957, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00014248131277557686, + "loss": 0.7325, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00013900747904622542, + "loss": 0.7292, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9098347554606826, + "eval_loss": 0.8047086000442505, + "eval_precision": 0.9082807107554147, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.8347, + "eval_samples_per_second": 460.517, + "eval_steps_per_second": 3.701, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00013517686061403596, + "loss": 0.7242, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.9009936788628978, + "eval_loss": 0.8299344182014465, + "eval_precision": 0.9053400595098855, + "eval_recall": 0.8991130820399114, + "eval_runtime": 7.5525, + "eval_samples_per_second": 477.723, + "eval_steps_per_second": 3.84, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00013101176939505745, + "loss": 0.7277, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00012653646548390908, + "loss": 0.7183, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8921840354767184, + "eval_f1": 0.897555532617244, + "eval_loss": 0.8419504761695862, + "eval_precision": 0.9090590503287885, + "eval_recall": 0.8921840354767184, + "eval_runtime": 7.7672, + "eval_samples_per_second": 464.516, + "eval_steps_per_second": 3.734, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012177701584782531, + "loss": 0.7099, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00011676114249634587, + "loss": 0.7116, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9107067425363585, + "eval_loss": 0.8166165351867676, + "eval_precision": 0.9087198802649348, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.8987, + "eval_samples_per_second": 456.785, + "eval_steps_per_second": 3.671, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011151806101100464, + "loss": 0.7099, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9073858799413876, + "eval_loss": 0.8176850080490112, + "eval_precision": 0.9085251558389235, + "eval_recall": 0.9157427937915743, + "eval_runtime": 7.74, + "eval_samples_per_second": 466.148, + "eval_steps_per_second": 3.747, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010607831037552205, + "loss": 0.7097, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001004735750976754, + "loss": 0.7085, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9111165323324527, + "eval_loss": 0.8071944117546082, + "eval_precision": 0.9102880478941114, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.4685, + "eval_samples_per_second": 483.098, + "eval_steps_per_second": 3.883, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 9.473650065892162e-05, + "loss": 0.7036, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.890050336670846e-05, + "loss": 0.7093, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9106723021181227, + "eval_loss": 0.8048650622367859, + "eval_precision": 0.9074605565539456, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.9659, + "eval_samples_per_second": 452.928, + "eval_steps_per_second": 3.64, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 8.299957571701311e-05, + "loss": 0.7009, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9116935643344211, + "eval_loss": 0.8133228421211243, + "eval_precision": 0.9152209764172635, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.0285, + "eval_samples_per_second": 449.398, + "eval_steps_per_second": 3.612, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.706808840079907e-05, + "loss": 0.7005, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 7.114059010762868e-05, + "loss": 0.6983, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9146819661073555, + "eval_loss": 0.8046716451644897, + "eval_precision": 0.9138465871691727, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.9115, + "eval_samples_per_second": 456.045, + "eval_steps_per_second": 3.666, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 6.525160629250079e-05, + "loss": 0.6979, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.943543807802092e-05, + "loss": 0.6995, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9086649969054031, + "eval_loss": 0.811021625995636, + "eval_precision": 0.9057651956668531, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.773, + "eval_samples_per_second": 464.172, + "eval_steps_per_second": 3.731, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 5.37259624632236e-05, + "loss": 0.6941, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9122029502474489, + "eval_loss": 0.8090903162956238, + "eval_precision": 0.9108430667572165, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8926, + "eval_samples_per_second": 457.14, + "eval_steps_per_second": 3.674, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.8156435002755325e-05, + "loss": 0.6966, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.2759296105738285e-05, + "loss": 0.6964, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9162349251535975, + "eval_loss": 0.8019397258758545, + "eval_precision": 0.91475155402232, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.7565, + "eval_samples_per_second": 465.156, + "eval_steps_per_second": 3.739, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.756598208255081e-05, + "loss": 0.6932, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9093802889296593, + "eval_loss": 0.8079545497894287, + "eval_precision": 0.9067255084774305, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.6609, + "eval_samples_per_second": 470.96, + "eval_steps_per_second": 3.785, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.26067420401069e-05, + "loss": 0.6936, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.7910461692150445e-05, + "loss": 0.6928, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9165336249267656, + "eval_loss": 0.7997718453407288, + "eval_precision": 0.9149540847557971, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.7916, + "eval_samples_per_second": 463.064, + "eval_steps_per_second": 3.722, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.350449511080356e-05, + "loss": 0.6905, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.94145053993532e-05, + "loss": 0.6893, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9136988915020483, + "eval_loss": 0.8028073906898499, + "eval_precision": 0.9119699330646884, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.2957, + "eval_samples_per_second": 434.923, + "eval_steps_per_second": 3.496, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.5664315214297007e-05, + "loss": 0.6917, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9104584021237628, + "eval_loss": 0.8037900924682617, + "eval_precision": 0.9075596845906686, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.9543, + "eval_samples_per_second": 453.591, + "eval_steps_per_second": 3.646, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.2275768007301627e-05, + "loss": 0.686, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 9.268600795287252e-06, + "loss": 0.6879, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9168749274378349, + "eval_loss": 0.8009039759635925, + "eval_precision": 0.9141391437595018, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.9654, + "eval_samples_per_second": 452.961, + "eval_steps_per_second": 3.641, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.660329199704529e-06, + "loss": 0.689, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.466145424607172e-06, + "loss": 0.6882, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9097351278852283, + "eval_loss": 0.8058528304100037, + "eval_precision": 0.9068820566264718, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8269, + "eval_samples_per_second": 460.972, + "eval_steps_per_second": 3.705, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.698829767759866e-06, + "loss": 0.689, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9129586404079126, + "eval_loss": 0.8073987364768982, + "eval_precision": 0.9101675816658973, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.904, + "eval_samples_per_second": 456.475, + "eval_steps_per_second": 3.669, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.3686761801958123e-06, + "loss": 0.6906, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.834323078117404e-07, + "loss": 0.6904, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9180823883093728, + "eval_loss": 0.7987972497940063, + "eval_precision": 0.9157644622842541, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.6262, + "eval_samples_per_second": 473.105, + "eval_steps_per_second": 3.803, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.1005258860047984, + "learning_rate": 0.00015550588661619518, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-al5n9gw8/checkpoint-616/training_args.bin b/run-al5n9gw8/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..754ab77540cb341b3a02360eee4ae6cdba4d5f36 --- /dev/null +++ b/run-al5n9gw8/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d967ae94422bb13d311017101769fc2ff84863fedc3d4c2f3a883baa50c04d7f +size 4792 diff --git a/run-al5n9gw8/checkpoint-630/model.safetensors b/run-al5n9gw8/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..994ab7fef74d4770401d39719630e8d2931a6688 --- /dev/null +++ b/run-al5n9gw8/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb680e33547876c5d63f4299d2f10cdcdafe8e4414a9a14774a95cc1e91fa97 +size 198025308 diff --git a/run-al5n9gw8/checkpoint-630/optimizer.pt b/run-al5n9gw8/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec836a49eb499d25eeccfad9e1e94b7b5511334e --- /dev/null +++ b/run-al5n9gw8/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f675eb97b2666bd34ccd0f072f97dba2f9e626fe398d73304ec8df7a7cbf55 +size 395900602 diff --git a/run-al5n9gw8/checkpoint-630/rng_state.pth b/run-al5n9gw8/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-al5n9gw8/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-al5n9gw8/checkpoint-630/scheduler.pt b/run-al5n9gw8/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c359ea02f67578017fceb8630e559a0b2bedc2b2 --- /dev/null +++ b/run-al5n9gw8/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e72d52261f6e81a9e7aec10f2c226d6f9f05d3525d81918216a70ba9db5c1882 +size 1064 diff --git a/run-al5n9gw8/checkpoint-630/trainer_state.json b/run-al5n9gw8/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef4b25d099197ac461f6c245f95b6a00a393fba4 --- /dev/null +++ b/run-al5n9gw8/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9180823883093728, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-al5n9gw8/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.127975290537408e-05, + "loss": 1.4486, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9894014000892639, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9885, + "eval_samples_per_second": 451.647, + "eval_steps_per_second": 3.63, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 4.255950581074816e-05, + "loss": 1.067, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 6.383925871612224e-05, + "loss": 0.9098, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.883869179600887, + "eval_f1": 0.8712563571511649, + "eval_loss": 0.9086562991142273, + "eval_precision": 0.8794857359604061, + "eval_recall": 0.883869179600887, + "eval_runtime": 7.8659, + "eval_samples_per_second": 458.691, + "eval_steps_per_second": 3.687, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 8.511901162149632e-05, + "loss": 0.8439, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9048518985618585, + "eval_loss": 0.8177870512008667, + "eval_precision": 0.9005270796812408, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.5734, + "eval_samples_per_second": 476.406, + "eval_steps_per_second": 3.829, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001063987645268704, + "loss": 0.8096, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00012767851743224448, + "loss": 0.7879, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.8975921543964994, + "eval_loss": 0.8090024590492249, + "eval_precision": 0.8976192874170199, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.4475, + "eval_samples_per_second": 484.46, + "eval_steps_per_second": 3.894, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014895827033761853, + "loss": 0.7892, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00015539732833543595, + "loss": 0.7661, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9059228720165633, + "eval_loss": 0.8293875455856323, + "eval_precision": 0.9095498470137525, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.1538, + "eval_samples_per_second": 442.495, + "eval_steps_per_second": 3.557, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00015485796824190443, + "loss": 0.7706, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.9006088853331862, + "eval_loss": 0.8217763900756836, + "eval_precision": 0.9084525448400171, + "eval_recall": 0.9032705099778271, + "eval_runtime": 7.7328, + "eval_samples_per_second": 466.582, + "eval_steps_per_second": 3.75, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00015386950028922693, + "loss": 0.7586, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00015243768193259454, + "loss": 0.7509, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9106200672087575, + "eval_loss": 0.7984517812728882, + "eval_precision": 0.9072051690629884, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.5816, + "eval_samples_per_second": 475.889, + "eval_steps_per_second": 3.825, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00015057085297706384, + "loss": 0.7483, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001482798870013196, + "loss": 0.7385, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.908451271578806, + "eval_loss": 0.8063976764678955, + "eval_precision": 0.9046163850802094, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8645, + "eval_samples_per_second": 458.77, + "eval_steps_per_second": 3.687, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00014557812802315612, + "loss": 0.7316, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9107906423012013, + "eval_loss": 0.8036113381385803, + "eval_precision": 0.9093959807082621, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.3281, + "eval_samples_per_second": 492.353, + "eval_steps_per_second": 3.957, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00014248131277557686, + "loss": 0.7325, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00013900747904622542, + "loss": 0.7292, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9098347554606826, + "eval_loss": 0.8047086000442505, + "eval_precision": 0.9082807107554147, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.8347, + "eval_samples_per_second": 460.517, + "eval_steps_per_second": 3.701, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00013517686061403596, + "loss": 0.7242, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.9009936788628978, + "eval_loss": 0.8299344182014465, + "eval_precision": 0.9053400595098855, + "eval_recall": 0.8991130820399114, + "eval_runtime": 7.5525, + "eval_samples_per_second": 477.723, + "eval_steps_per_second": 3.84, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00013101176939505745, + "loss": 0.7277, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00012653646548390908, + "loss": 0.7183, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8921840354767184, + "eval_f1": 0.897555532617244, + "eval_loss": 0.8419504761695862, + "eval_precision": 0.9090590503287885, + "eval_recall": 0.8921840354767184, + "eval_runtime": 7.7672, + "eval_samples_per_second": 464.516, + "eval_steps_per_second": 3.734, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012177701584782531, + "loss": 0.7099, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00011676114249634587, + "loss": 0.7116, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9107067425363585, + "eval_loss": 0.8166165351867676, + "eval_precision": 0.9087198802649348, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.8987, + "eval_samples_per_second": 456.785, + "eval_steps_per_second": 3.671, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011151806101100464, + "loss": 0.7099, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9073858799413876, + "eval_loss": 0.8176850080490112, + "eval_precision": 0.9085251558389235, + "eval_recall": 0.9157427937915743, + "eval_runtime": 7.74, + "eval_samples_per_second": 466.148, + "eval_steps_per_second": 3.747, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010607831037552205, + "loss": 0.7097, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001004735750976754, + "loss": 0.7085, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9111165323324527, + "eval_loss": 0.8071944117546082, + "eval_precision": 0.9102880478941114, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.4685, + "eval_samples_per_second": 483.098, + "eval_steps_per_second": 3.883, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 9.473650065892162e-05, + "loss": 0.7036, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.890050336670846e-05, + "loss": 0.7093, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9106723021181227, + "eval_loss": 0.8048650622367859, + "eval_precision": 0.9074605565539456, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.9659, + "eval_samples_per_second": 452.928, + "eval_steps_per_second": 3.64, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 8.299957571701311e-05, + "loss": 0.7009, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9116935643344211, + "eval_loss": 0.8133228421211243, + "eval_precision": 0.9152209764172635, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.0285, + "eval_samples_per_second": 449.398, + "eval_steps_per_second": 3.612, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.706808840079907e-05, + "loss": 0.7005, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 7.114059010762868e-05, + "loss": 0.6983, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9146819661073555, + "eval_loss": 0.8046716451644897, + "eval_precision": 0.9138465871691727, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.9115, + "eval_samples_per_second": 456.045, + "eval_steps_per_second": 3.666, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 6.525160629250079e-05, + "loss": 0.6979, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.943543807802092e-05, + "loss": 0.6995, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9086649969054031, + "eval_loss": 0.811021625995636, + "eval_precision": 0.9057651956668531, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.773, + "eval_samples_per_second": 464.172, + "eval_steps_per_second": 3.731, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 5.37259624632236e-05, + "loss": 0.6941, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9122029502474489, + "eval_loss": 0.8090903162956238, + "eval_precision": 0.9108430667572165, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8926, + "eval_samples_per_second": 457.14, + "eval_steps_per_second": 3.674, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.8156435002755325e-05, + "loss": 0.6966, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.2759296105738285e-05, + "loss": 0.6964, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9162349251535975, + "eval_loss": 0.8019397258758545, + "eval_precision": 0.91475155402232, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.7565, + "eval_samples_per_second": 465.156, + "eval_steps_per_second": 3.739, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.756598208255081e-05, + "loss": 0.6932, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9093802889296593, + "eval_loss": 0.8079545497894287, + "eval_precision": 0.9067255084774305, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.6609, + "eval_samples_per_second": 470.96, + "eval_steps_per_second": 3.785, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.26067420401069e-05, + "loss": 0.6936, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.7910461692150445e-05, + "loss": 0.6928, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9165336249267656, + "eval_loss": 0.7997718453407288, + "eval_precision": 0.9149540847557971, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.7916, + "eval_samples_per_second": 463.064, + "eval_steps_per_second": 3.722, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.350449511080356e-05, + "loss": 0.6905, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.94145053993532e-05, + "loss": 0.6893, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9136988915020483, + "eval_loss": 0.8028073906898499, + "eval_precision": 0.9119699330646884, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.2957, + "eval_samples_per_second": 434.923, + "eval_steps_per_second": 3.496, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.5664315214297007e-05, + "loss": 0.6917, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9104584021237628, + "eval_loss": 0.8037900924682617, + "eval_precision": 0.9075596845906686, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.9543, + "eval_samples_per_second": 453.591, + "eval_steps_per_second": 3.646, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.2275768007301627e-05, + "loss": 0.686, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 9.268600795287252e-06, + "loss": 0.6879, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9168749274378349, + "eval_loss": 0.8009039759635925, + "eval_precision": 0.9141391437595018, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.9654, + "eval_samples_per_second": 452.961, + "eval_steps_per_second": 3.641, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.660329199704529e-06, + "loss": 0.689, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.466145424607172e-06, + "loss": 0.6882, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9097351278852283, + "eval_loss": 0.8058528304100037, + "eval_precision": 0.9068820566264718, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8269, + "eval_samples_per_second": 460.972, + "eval_steps_per_second": 3.705, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.698829767759866e-06, + "loss": 0.689, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9129586404079126, + "eval_loss": 0.8073987364768982, + "eval_precision": 0.9101675816658973, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.904, + "eval_samples_per_second": 456.475, + "eval_steps_per_second": 3.669, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.3686761801958123e-06, + "loss": 0.6906, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.834323078117404e-07, + "loss": 0.6904, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9180823883093728, + "eval_loss": 0.7987972497940063, + "eval_precision": 0.9157644622842541, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.6262, + "eval_samples_per_second": 473.105, + "eval_steps_per_second": 3.803, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 4.825436423562898e-08, + "loss": 0.6844, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.914649709467982, + "eval_loss": 0.8053018450737, + "eval_precision": 0.9128783765678965, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.4817, + "eval_samples_per_second": 482.244, + "eval_steps_per_second": 3.876, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.1005258860047984, + "learning_rate": 0.00015550588661619518, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-al5n9gw8/checkpoint-630/training_args.bin b/run-al5n9gw8/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..754ab77540cb341b3a02360eee4ae6cdba4d5f36 --- /dev/null +++ b/run-al5n9gw8/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d967ae94422bb13d311017101769fc2ff84863fedc3d4c2f3a883baa50c04d7f +size 4792 diff --git a/run-b202huyi/checkpoint-531/model.safetensors b/run-b202huyi/checkpoint-531/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7fedaccf30c1b0d3c1d1c80a9a45d5e6f842606 --- /dev/null +++ b/run-b202huyi/checkpoint-531/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:322f914796e8d02897b9f28ef8293abb5955c7c5b2a24b6aab64b55e042f9ee3 +size 198025308 diff --git a/run-b202huyi/checkpoint-531/optimizer.pt b/run-b202huyi/checkpoint-531/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..812ae750d4158ad3a8145cb5fdbd710c3cf96039 --- /dev/null +++ b/run-b202huyi/checkpoint-531/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb5e7285c7bd13c19204742c97c192e1d047d1246e2f95e23bbe145a45ff00d +size 395900602 diff --git a/run-b202huyi/checkpoint-531/rng_state.pth b/run-b202huyi/checkpoint-531/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f50e97f6b8f8d895fe76d5cec6c2f434c3883a4 --- /dev/null +++ b/run-b202huyi/checkpoint-531/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ebc248f8c573102043dc64b4a3df9b7043de65474577f599703b3d52b79074 +size 14244 diff --git a/run-b202huyi/checkpoint-531/scheduler.pt b/run-b202huyi/checkpoint-531/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e4372d4bc26135813bd2dc3b34a6b4423104fc7 --- /dev/null +++ b/run-b202huyi/checkpoint-531/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97991c4552b45e60e13dfa041a48cc3aed5da36415fe1372455ea474d7100b1e +size 1064 diff --git a/run-b202huyi/checkpoint-531/trainer_state.json b/run-b202huyi/checkpoint-531/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bed237d857a173fe319ec8579e535a0f401e8ac5 --- /dev/null +++ b/run-b202huyi/checkpoint-531/trainer_state.json @@ -0,0 +1,568 @@ +{ + "best_metric": 0.9154302063788887, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-b202huyi/checkpoint-531", + "epoch": 24.988235294117647, + "eval_steps": 500, + "global_step": 531, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.622594200922678e-05, + "loss": 1.306, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1507611274719238, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.7162, + "eval_samples_per_second": 467.589, + "eval_steps_per_second": 3.758, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015245188401845355, + "loss": 0.9496, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00022867782602768033, + "loss": 0.8321, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8705654101995566, + "eval_f1": 0.8747878698361956, + "eval_loss": 0.9035532474517822, + "eval_precision": 0.8901413864467017, + "eval_recall": 0.8705654101995566, + "eval_runtime": 8.0467, + "eval_samples_per_second": 448.384, + "eval_steps_per_second": 3.604, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0003049037680369071, + "loss": 0.7993, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8985587583148559, + "eval_f1": 0.8971642216071374, + "eval_loss": 0.8272557854652405, + "eval_precision": 0.8971272462358146, + "eval_recall": 0.8985587583148559, + "eval_runtime": 7.9119, + "eval_samples_per_second": 456.022, + "eval_steps_per_second": 3.665, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003811297100461339, + "loss": 0.7942, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00045735565205536066, + "loss": 0.7814, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8702882483370288, + "eval_f1": 0.8697141217610903, + "eval_loss": 0.867261528968811, + "eval_precision": 0.8733739134403936, + "eval_recall": 0.8702882483370288, + "eval_runtime": 7.9727, + "eval_samples_per_second": 452.544, + "eval_steps_per_second": 3.637, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005335815940645874, + "loss": 0.7934, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005566468647807583, + "loss": 0.7791, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.781319290465632, + "eval_f1": 0.80564856144356, + "eval_loss": 1.0475393533706665, + "eval_precision": 0.8813905782441916, + "eval_recall": 0.781319290465632, + "eval_runtime": 7.8594, + "eval_samples_per_second": 459.07, + "eval_steps_per_second": 3.69, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005547148296018516, + "loss": 0.7831, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8736141906873615, + "eval_f1": 0.8482725188575098, + "eval_loss": 0.8844410181045532, + "eval_precision": 0.8546497663204456, + "eval_recall": 0.8736141906873615, + "eval_runtime": 8.0311, + "eval_samples_per_second": 449.256, + "eval_steps_per_second": 3.611, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.000551174050666409, + "loss": 0.7828, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005460451516840884, + "loss": 0.7689, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8572616407982262, + "eval_f1": 0.8663172593273174, + "eval_loss": 0.9063207507133484, + "eval_precision": 0.8842961167434273, + "eval_recall": 0.8572616407982262, + "eval_runtime": 7.7217, + "eval_samples_per_second": 467.252, + "eval_steps_per_second": 3.756, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005393580065683435, + "loss": 0.7651, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005311515654320797, + "loss": 0.7625, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8655764966740577, + "eval_f1": 0.8273995583953322, + "eval_loss": 0.9005953669548035, + "eval_precision": 0.86145995381129, + "eval_recall": 0.8655764966740577, + "eval_runtime": 7.6951, + "eval_samples_per_second": 468.872, + "eval_steps_per_second": 3.769, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005214736277178505, + "loss": 0.7524, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.5263303769401331, + "eval_f1": 0.6186340799487596, + "eval_loss": 1.374240756034851, + "eval_precision": 0.8648233593669671, + "eval_recall": 0.526330376940133, + "eval_runtime": 7.9849, + "eval_samples_per_second": 451.855, + "eval_steps_per_second": 3.632, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005103805637840278, + "loss": 0.7548, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004979369865686003, + "loss": 0.753, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8766629711751663, + "eval_f1": 0.8442640031957992, + "eval_loss": 0.9040635228157043, + "eval_precision": 0.8769568869259073, + "eval_recall": 0.8766629711751663, + "eval_runtime": 7.6152, + "eval_samples_per_second": 473.789, + "eval_steps_per_second": 3.808, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004842153752430379, + "loss": 0.7425, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.7621951219512195, + "eval_f1": 0.8018004442781752, + "eval_loss": 1.033315658569336, + "eval_precision": 0.8786365541580559, + "eval_recall": 0.7621951219512195, + "eval_runtime": 8.0422, + "eval_samples_per_second": 448.635, + "eval_steps_per_second": 3.606, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0004692956530482931, + "loss": 0.7552, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0004532647217718914, + "loss": 0.7314, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8519955654101996, + "eval_f1": 0.8451645168591709, + "eval_loss": 0.9120221734046936, + "eval_precision": 0.844736620798598, + "eval_recall": 0.8519955654101996, + "eval_runtime": 7.6298, + "eval_samples_per_second": 472.88, + "eval_steps_per_second": 3.801, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004362159555776028, + "loss": 0.7323, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00041824865713594936, + "loss": 0.7335, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8736141906873615, + "eval_f1": 0.8654423635376545, + "eval_loss": 0.893662691116333, + "eval_precision": 0.8861090394348778, + "eval_recall": 0.8736141906873615, + "eval_runtime": 7.5067, + "eval_samples_per_second": 480.637, + "eval_steps_per_second": 3.863, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00039946747922338344, + "loss": 0.7311, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9018847006651884, + "eval_f1": 0.8926249676308581, + "eval_loss": 0.8407385945320129, + "eval_precision": 0.8968758680868214, + "eval_recall": 0.9018847006651884, + "eval_runtime": 7.9818, + "eval_samples_per_second": 452.026, + "eval_steps_per_second": 3.633, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.000379981815159106, + "loss": 0.7295, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003599051616299989, + "loss": 0.7221, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.8991840925465358, + "eval_loss": 0.8300164937973022, + "eval_precision": 0.8973972337072053, + "eval_recall": 0.9046563192904656, + "eval_runtime": 7.4255, + "eval_samples_per_second": 485.895, + "eval_steps_per_second": 3.905, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003393544576149809, + "loss": 0.7187, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00031844940325930274, + "loss": 0.7173, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8996674057649667, + "eval_f1": 0.8876670378214859, + "eval_loss": 0.8413764238357544, + "eval_precision": 0.8874904772615283, + "eval_recall": 0.8996674057649667, + "eval_runtime": 7.894, + "eval_samples_per_second": 457.058, + "eval_steps_per_second": 3.674, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.000297311762666083, + "loss": 0.7182, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.9025479943721222, + "eval_loss": 0.8175304532051086, + "eval_precision": 0.9001839678933262, + "eval_recall": 0.9085365853658537, + "eval_runtime": 8.11, + "eval_samples_per_second": 444.885, + "eval_steps_per_second": 3.576, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00027606465466606427, + "loss": 0.7119, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002548318356965879, + "loss": 0.7111, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.8956675043328327, + "eval_loss": 0.834709644317627, + "eval_precision": 0.8969026735174765, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.0817, + "eval_samples_per_second": 446.442, + "eval_steps_per_second": 3.588, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00023373697896675015, + "loss": 0.707, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00021290295410733147, + "loss": 0.7074, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9076422188009077, + "eval_loss": 0.8188020586967468, + "eval_precision": 0.9074522228362695, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.9545, + "eval_samples_per_second": 453.58, + "eval_steps_per_second": 3.646, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001924511115012679, + "loss": 0.6994, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8741685144124168, + "eval_f1": 0.8814495016039771, + "eval_loss": 0.869800865650177, + "eval_precision": 0.8959392789861267, + "eval_recall": 0.8741685144124168, + "eval_runtime": 7.5003, + "eval_samples_per_second": 481.045, + "eval_steps_per_second": 3.866, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00017250057546317157, + "loss": 0.7017, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00015316755038486505, + "loss": 0.6966, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.912882639706859, + "eval_loss": 0.8059185743331909, + "eval_precision": 0.9092735705553316, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.2416, + "eval_samples_per_second": 437.777, + "eval_steps_per_second": 3.519, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00013456464388837003, + "loss": 0.6966, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.90868167505349, + "eval_loss": 0.8128464818000793, + "eval_precision": 0.9057514711981032, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.023, + "eval_samples_per_second": 449.706, + "eval_steps_per_second": 3.615, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00011680021092873271, + "loss": 0.6959, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 9.997772266703892e-05, + "loss": 0.6958, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.910454495266337, + "eval_loss": 0.8116774559020996, + "eval_precision": 0.9097606876991597, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.7383, + "eval_samples_per_second": 466.251, + "eval_steps_per_second": 3.748, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 8.419516378969772e-05, + "loss": 0.6917, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.954446178438377e-05, + "loss": 0.6899, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9127751879794206, + "eval_loss": 0.8156436085700989, + "eval_precision": 0.9121046400242679, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9642, + "eval_samples_per_second": 453.03, + "eval_steps_per_second": 3.641, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 5.6110951496890165e-05, + "loss": 0.6927, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9154302063788887, + "eval_loss": 0.8100875616073608, + "eval_precision": 0.9125550689910698, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.0179, + "eval_samples_per_second": 449.996, + "eval_steps_per_second": 3.617, + "step": 531 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4816270596680365, + "learning_rate": 0.0005570357300674264, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-b202huyi/checkpoint-531/training_args.bin b/run-b202huyi/checkpoint-531/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..60dbcb448d8f9483ee2200f74a2c8a3a00e55cb8 --- /dev/null +++ b/run-b202huyi/checkpoint-531/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead48f12638ab9472e292af87e3c1c5685589e718fe03636d3eecabfef109a20 +size 4792 diff --git a/run-b202huyi/checkpoint-630/model.safetensors b/run-b202huyi/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2a86b09b6cd598112977db5cee9de0aefcc44ca --- /dev/null +++ b/run-b202huyi/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a458ec9f8dc55f8e4e887e386a9f95d53cc43ed618123257bc8495cce131e8a +size 198025308 diff --git a/run-b202huyi/checkpoint-630/optimizer.pt b/run-b202huyi/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..276e1e771fca10e79b0d93d1de45672ce7ea3268 --- /dev/null +++ b/run-b202huyi/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5263711238b6eb301fcf2e1ebdb1b96037462a0e0e07759204c6fed1a20791 +size 395900602 diff --git a/run-b202huyi/checkpoint-630/rng_state.pth b/run-b202huyi/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-b202huyi/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-b202huyi/checkpoint-630/scheduler.pt b/run-b202huyi/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..54c47f64b14b687a23aa85490f8e65042dafabd5 --- /dev/null +++ b/run-b202huyi/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16df27f7097286b2608f4b0ec1dfa23ed30e36b1dce9448f21e6789ad651a725 +size 1064 diff --git a/run-b202huyi/checkpoint-630/trainer_state.json b/run-b202huyi/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..36d3da41b3d5a11214ea7cce12cd8e95ab9fb2f1 --- /dev/null +++ b/run-b202huyi/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9154302063788887, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-b202huyi/checkpoint-531", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.622594200922678e-05, + "loss": 1.306, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1507611274719238, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.7162, + "eval_samples_per_second": 467.589, + "eval_steps_per_second": 3.758, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015245188401845355, + "loss": 0.9496, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00022867782602768033, + "loss": 0.8321, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8705654101995566, + "eval_f1": 0.8747878698361956, + "eval_loss": 0.9035532474517822, + "eval_precision": 0.8901413864467017, + "eval_recall": 0.8705654101995566, + "eval_runtime": 8.0467, + "eval_samples_per_second": 448.384, + "eval_steps_per_second": 3.604, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0003049037680369071, + "loss": 0.7993, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8985587583148559, + "eval_f1": 0.8971642216071374, + "eval_loss": 0.8272557854652405, + "eval_precision": 0.8971272462358146, + "eval_recall": 0.8985587583148559, + "eval_runtime": 7.9119, + "eval_samples_per_second": 456.022, + "eval_steps_per_second": 3.665, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003811297100461339, + "loss": 0.7942, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00045735565205536066, + "loss": 0.7814, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8702882483370288, + "eval_f1": 0.8697141217610903, + "eval_loss": 0.867261528968811, + "eval_precision": 0.8733739134403936, + "eval_recall": 0.8702882483370288, + "eval_runtime": 7.9727, + "eval_samples_per_second": 452.544, + "eval_steps_per_second": 3.637, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005335815940645874, + "loss": 0.7934, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005566468647807583, + "loss": 0.7791, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.781319290465632, + "eval_f1": 0.80564856144356, + "eval_loss": 1.0475393533706665, + "eval_precision": 0.8813905782441916, + "eval_recall": 0.781319290465632, + "eval_runtime": 7.8594, + "eval_samples_per_second": 459.07, + "eval_steps_per_second": 3.69, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005547148296018516, + "loss": 0.7831, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8736141906873615, + "eval_f1": 0.8482725188575098, + "eval_loss": 0.8844410181045532, + "eval_precision": 0.8546497663204456, + "eval_recall": 0.8736141906873615, + "eval_runtime": 8.0311, + "eval_samples_per_second": 449.256, + "eval_steps_per_second": 3.611, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.000551174050666409, + "loss": 0.7828, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005460451516840884, + "loss": 0.7689, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8572616407982262, + "eval_f1": 0.8663172593273174, + "eval_loss": 0.9063207507133484, + "eval_precision": 0.8842961167434273, + "eval_recall": 0.8572616407982262, + "eval_runtime": 7.7217, + "eval_samples_per_second": 467.252, + "eval_steps_per_second": 3.756, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005393580065683435, + "loss": 0.7651, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005311515654320797, + "loss": 0.7625, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8655764966740577, + "eval_f1": 0.8273995583953322, + "eval_loss": 0.9005953669548035, + "eval_precision": 0.86145995381129, + "eval_recall": 0.8655764966740577, + "eval_runtime": 7.6951, + "eval_samples_per_second": 468.872, + "eval_steps_per_second": 3.769, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005214736277178505, + "loss": 0.7524, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.5263303769401331, + "eval_f1": 0.6186340799487596, + "eval_loss": 1.374240756034851, + "eval_precision": 0.8648233593669671, + "eval_recall": 0.526330376940133, + "eval_runtime": 7.9849, + "eval_samples_per_second": 451.855, + "eval_steps_per_second": 3.632, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005103805637840278, + "loss": 0.7548, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004979369865686003, + "loss": 0.753, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8766629711751663, + "eval_f1": 0.8442640031957992, + "eval_loss": 0.9040635228157043, + "eval_precision": 0.8769568869259073, + "eval_recall": 0.8766629711751663, + "eval_runtime": 7.6152, + "eval_samples_per_second": 473.789, + "eval_steps_per_second": 3.808, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004842153752430379, + "loss": 0.7425, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.7621951219512195, + "eval_f1": 0.8018004442781752, + "eval_loss": 1.033315658569336, + "eval_precision": 0.8786365541580559, + "eval_recall": 0.7621951219512195, + "eval_runtime": 8.0422, + "eval_samples_per_second": 448.635, + "eval_steps_per_second": 3.606, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0004692956530482931, + "loss": 0.7552, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0004532647217718914, + "loss": 0.7314, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8519955654101996, + "eval_f1": 0.8451645168591709, + "eval_loss": 0.9120221734046936, + "eval_precision": 0.844736620798598, + "eval_recall": 0.8519955654101996, + "eval_runtime": 7.6298, + "eval_samples_per_second": 472.88, + "eval_steps_per_second": 3.801, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004362159555776028, + "loss": 0.7323, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00041824865713594936, + "loss": 0.7335, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8736141906873615, + "eval_f1": 0.8654423635376545, + "eval_loss": 0.893662691116333, + "eval_precision": 0.8861090394348778, + "eval_recall": 0.8736141906873615, + "eval_runtime": 7.5067, + "eval_samples_per_second": 480.637, + "eval_steps_per_second": 3.863, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00039946747922338344, + "loss": 0.7311, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9018847006651884, + "eval_f1": 0.8926249676308581, + "eval_loss": 0.8407385945320129, + "eval_precision": 0.8968758680868214, + "eval_recall": 0.9018847006651884, + "eval_runtime": 7.9818, + "eval_samples_per_second": 452.026, + "eval_steps_per_second": 3.633, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.000379981815159106, + "loss": 0.7295, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003599051616299989, + "loss": 0.7221, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.8991840925465358, + "eval_loss": 0.8300164937973022, + "eval_precision": 0.8973972337072053, + "eval_recall": 0.9046563192904656, + "eval_runtime": 7.4255, + "eval_samples_per_second": 485.895, + "eval_steps_per_second": 3.905, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003393544576149809, + "loss": 0.7187, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00031844940325930274, + "loss": 0.7173, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8996674057649667, + "eval_f1": 0.8876670378214859, + "eval_loss": 0.8413764238357544, + "eval_precision": 0.8874904772615283, + "eval_recall": 0.8996674057649667, + "eval_runtime": 7.894, + "eval_samples_per_second": 457.058, + "eval_steps_per_second": 3.674, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.000297311762666083, + "loss": 0.7182, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.9025479943721222, + "eval_loss": 0.8175304532051086, + "eval_precision": 0.9001839678933262, + "eval_recall": 0.9085365853658537, + "eval_runtime": 8.11, + "eval_samples_per_second": 444.885, + "eval_steps_per_second": 3.576, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00027606465466606427, + "loss": 0.7119, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002548318356965879, + "loss": 0.7111, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.8956675043328327, + "eval_loss": 0.834709644317627, + "eval_precision": 0.8969026735174765, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.0817, + "eval_samples_per_second": 446.442, + "eval_steps_per_second": 3.588, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00023373697896675015, + "loss": 0.707, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00021290295410733147, + "loss": 0.7074, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9076422188009077, + "eval_loss": 0.8188020586967468, + "eval_precision": 0.9074522228362695, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.9545, + "eval_samples_per_second": 453.58, + "eval_steps_per_second": 3.646, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001924511115012679, + "loss": 0.6994, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8741685144124168, + "eval_f1": 0.8814495016039771, + "eval_loss": 0.869800865650177, + "eval_precision": 0.8959392789861267, + "eval_recall": 0.8741685144124168, + "eval_runtime": 7.5003, + "eval_samples_per_second": 481.045, + "eval_steps_per_second": 3.866, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00017250057546317157, + "loss": 0.7017, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00015316755038486505, + "loss": 0.6966, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.912882639706859, + "eval_loss": 0.8059185743331909, + "eval_precision": 0.9092735705553316, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.2416, + "eval_samples_per_second": 437.777, + "eval_steps_per_second": 3.519, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00013456464388837003, + "loss": 0.6966, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.90868167505349, + "eval_loss": 0.8128464818000793, + "eval_precision": 0.9057514711981032, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.023, + "eval_samples_per_second": 449.706, + "eval_steps_per_second": 3.615, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00011680021092873271, + "loss": 0.6959, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 9.997772266703892e-05, + "loss": 0.6958, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.910454495266337, + "eval_loss": 0.8116774559020996, + "eval_precision": 0.9097606876991597, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.7383, + "eval_samples_per_second": 466.251, + "eval_steps_per_second": 3.748, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 8.419516378969772e-05, + "loss": 0.6917, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.954446178438377e-05, + "loss": 0.6899, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9127751879794206, + "eval_loss": 0.8156436085700989, + "eval_precision": 0.9121046400242679, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9642, + "eval_samples_per_second": 453.03, + "eval_steps_per_second": 3.641, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 5.6110951496890165e-05, + "loss": 0.6927, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9154302063788887, + "eval_loss": 0.8100875616073608, + "eval_precision": 0.9125550689910698, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.0179, + "eval_samples_per_second": 449.996, + "eval_steps_per_second": 3.617, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 4.397287808764835e-05, + "loss": 0.6875, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 3.320094128300779e-05, + "loss": 0.6894, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9150512239350818, + "eval_loss": 0.8058057427406311, + "eval_precision": 0.9134810970829045, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.1297, + "eval_samples_per_second": 443.806, + "eval_steps_per_second": 3.567, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.3857883575839036e-05, + "loss": 0.6902, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.5998124774038356e-05, + "loss": 0.6867, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9089599373807715, + "eval_loss": 0.8141684532165527, + "eval_precision": 0.9071477479856133, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.908, + "eval_samples_per_second": 456.245, + "eval_steps_per_second": 3.667, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 9.667445025552195e-06, + "loss": 0.6897, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9081586994800988, + "eval_loss": 0.8139289617538452, + "eval_precision": 0.9048885760429236, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.4976, + "eval_samples_per_second": 481.218, + "eval_steps_per_second": 3.868, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 4.902718166180794e-06, + "loss": 0.6897, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.7316969433107521e-06, + "loss": 0.6892, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9110720414706266, + "eval_loss": 0.8100309371948242, + "eval_precision": 0.9087990919916861, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.4672, + "eval_samples_per_second": 483.177, + "eval_steps_per_second": 3.884, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.7285136656771258e-07, + "loss": 0.6838, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9125149420134374, + "eval_loss": 0.816099226474762, + "eval_precision": 0.9111411942591741, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.2759, + "eval_samples_per_second": 435.962, + "eval_steps_per_second": 3.504, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4816270596680365, + "learning_rate": 0.0005570357300674264, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-b202huyi/checkpoint-630/training_args.bin b/run-b202huyi/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..60dbcb448d8f9483ee2200f74a2c8a3a00e55cb8 --- /dev/null +++ b/run-b202huyi/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead48f12638ab9472e292af87e3c1c5685589e718fe03636d3eecabfef109a20 +size 4792 diff --git a/run-b3iy7cum/checkpoint-573/model.safetensors b/run-b3iy7cum/checkpoint-573/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebb024bb4fa962a0627e0a57b9d1a9bc069fb2a6 --- /dev/null +++ b/run-b3iy7cum/checkpoint-573/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f042a7a4b3277366a6a46a8a1eb38914edbb6b4a8163dc8a9a8ad24ef7d6004 +size 198025308 diff --git a/run-b3iy7cum/checkpoint-573/optimizer.pt b/run-b3iy7cum/checkpoint-573/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..80a2ea06ec03d19c1288e1edde3f0481875ef078 --- /dev/null +++ b/run-b3iy7cum/checkpoint-573/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45b1b3bd29e433f13a2f4fdb352e03b8f2e3d0b09938705e50fcb9fa2d422b1 +size 395900602 diff --git a/run-b3iy7cum/checkpoint-573/rng_state.pth b/run-b3iy7cum/checkpoint-573/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e8713f9caaf617efce0d3935618a93ad2f5f391 --- /dev/null +++ b/run-b3iy7cum/checkpoint-573/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9102bb312b12c2313ea7585eb813beef5c548592778aaea4ab0516e14ecd38e5 +size 14244 diff --git a/run-b3iy7cum/checkpoint-573/scheduler.pt b/run-b3iy7cum/checkpoint-573/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f4e22564cd3ff3fe2519046af9e7391d24b7e33 --- /dev/null +++ b/run-b3iy7cum/checkpoint-573/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b3745362d14852b69e3a4dbae4df08214a48ef3de34d75c0fa24c5e19e1bbf +size 1064 diff --git a/run-b3iy7cum/checkpoint-573/trainer_state.json b/run-b3iy7cum/checkpoint-573/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2e931c84aa35f3628805f75cd3910cd065a11299 --- /dev/null +++ b/run-b3iy7cum/checkpoint-573/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": 0.9167935491245766, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-b3iy7cum/checkpoint-573", + "epoch": 26.96470588235294, + "eval_steps": 500, + "global_step": 573, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.145999548445658e-05, + "loss": 1.4891, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8264966740576497, + "eval_f1": 0.7501969507313436, + "eval_loss": 1.1241339445114136, + "eval_precision": 0.6867941503359228, + "eval_recall": 0.8264966740576497, + "eval_runtime": 8.3443, + "eval_samples_per_second": 432.393, + "eval_steps_per_second": 3.475, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.291999096891316e-05, + "loss": 1.203, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 3.437998645336974e-05, + "loss": 0.9608, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8580931263858093, + "eval_f1": 0.8145928296837187, + "eval_loss": 0.9374946355819702, + "eval_precision": 0.8448013215645496, + "eval_recall": 0.8580931263858093, + "eval_runtime": 8.0911, + "eval_samples_per_second": 445.922, + "eval_steps_per_second": 3.584, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 4.583998193782632e-05, + "loss": 0.8784, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.8895179752774438, + "eval_loss": 0.8623562455177307, + "eval_precision": 0.8906441207781222, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.4025, + "eval_samples_per_second": 429.396, + "eval_steps_per_second": 3.451, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 5.72999774222829e-05, + "loss": 0.8464, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 6.875997290673948e-05, + "loss": 0.8095, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9027018244054482, + "eval_loss": 0.8148450255393982, + "eval_precision": 0.8991916457732703, + "eval_recall": 0.9104767184035477, + "eval_runtime": 8.0712, + "eval_samples_per_second": 447.022, + "eval_steps_per_second": 3.593, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 8.021996839119605e-05, + "loss": 0.8032, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 9.167996387565263e-05, + "loss": 0.7868, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8941241685144125, + "eval_f1": 0.8942512162931542, + "eval_loss": 0.8299208879470825, + "eval_precision": 0.8976682659072109, + "eval_recall": 0.8941241685144125, + "eval_runtime": 8.5166, + "eval_samples_per_second": 423.645, + "eval_steps_per_second": 3.405, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00010313995936010922, + "loss": 0.7699, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9039777674228808, + "eval_loss": 0.8073028326034546, + "eval_precision": 0.9046462056466061, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.3158, + "eval_samples_per_second": 433.874, + "eval_steps_per_second": 3.487, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011105654051929996, + "loss": 0.7726, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011076228539741515, + "loss": 0.7648, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.9015120421169303, + "eval_loss": 0.809033989906311, + "eval_precision": 0.9001669239906706, + "eval_recall": 0.9101995565410199, + "eval_runtime": 8.2613, + "eval_samples_per_second": 436.737, + "eval_steps_per_second": 3.51, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00011010559833430576, + "loss": 0.7588, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00010909078902143868, + "loss": 0.7536, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8965820315016871, + "eval_loss": 0.830237627029419, + "eval_precision": 0.8969960760700141, + "eval_recall": 0.8977272727272727, + "eval_runtime": 8.3437, + "eval_samples_per_second": 432.422, + "eval_steps_per_second": 3.476, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010772451742713356, + "loss": 0.7487, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9027678141279665, + "eval_loss": 0.8092654347419739, + "eval_precision": 0.9034965534189926, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.939, + "eval_samples_per_second": 454.467, + "eval_steps_per_second": 3.653, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010601575008866862, + "loss": 0.7452, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010397570126688627, + "loss": 0.7391, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9083147990042801, + "eval_loss": 0.8062973022460938, + "eval_precision": 0.9053586637651523, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.1303, + "eval_samples_per_second": 443.77, + "eval_steps_per_second": 3.567, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00010161775934948784, + "loss": 0.7323, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8957871396895787, + "eval_f1": 0.8972356143725164, + "eval_loss": 0.8380410075187683, + "eval_precision": 0.9014623680260581, + "eval_recall": 0.8957871396895787, + "eval_runtime": 8.4281, + "eval_samples_per_second": 428.09, + "eval_steps_per_second": 3.441, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 9.895739898601618e-05, + "loss": 0.7322, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.601207953116468e-05, + "loss": 0.7263, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9031716798626874, + "eval_loss": 0.8091875910758972, + "eval_precision": 0.9008695769618928, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.3906, + "eval_samples_per_second": 430.007, + "eval_steps_per_second": 3.456, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 9.280113046290703e-05, + "loss": 0.7257, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 8.934562452742266e-05, + "loss": 0.719, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9004988913525499, + "eval_f1": 0.9012616511377035, + "eval_loss": 0.8300186395645142, + "eval_precision": 0.904676266403974, + "eval_recall": 0.9004988913525499, + "eval_runtime": 8.3008, + "eval_samples_per_second": 434.657, + "eval_steps_per_second": 3.494, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.566823944333993e-05, + "loss": 0.7238, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9093928094595245, + "eval_loss": 0.8134934902191162, + "eval_precision": 0.908231543147672, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.4202, + "eval_samples_per_second": 428.493, + "eval_steps_per_second": 3.444, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 8.17931090729013e-05, + "loss": 0.7151, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.774566503678105e-05, + "loss": 0.7157, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9079822616407982, + "eval_f1": 0.9070385471707949, + "eval_loss": 0.8100745677947998, + "eval_precision": 0.9078809714071604, + "eval_recall": 0.9079822616407982, + "eval_runtime": 8.4146, + "eval_samples_per_second": 428.781, + "eval_steps_per_second": 3.446, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 7.355246981200254e-05, + "loss": 0.7129, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.92410424082959e-05, + "loss": 0.7128, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9099592994489757, + "eval_loss": 0.805757999420166, + "eval_precision": 0.9086141248342123, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.2329, + "eval_samples_per_second": 438.243, + "eval_steps_per_second": 3.522, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.483967776694429e-05, + "loss": 0.7098, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.9012826001484258, + "eval_loss": 0.8254275321960449, + "eval_precision": 0.9052236238978434, + "eval_recall": 0.8999445676274944, + "eval_runtime": 8.141, + "eval_samples_per_second": 443.19, + "eval_steps_per_second": 3.562, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.037726106736306e-05, + "loss": 0.7092, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.588307816007743e-05, + "loss": 0.7085, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9112120736730757, + "eval_loss": 0.8126747012138367, + "eval_precision": 0.9112515657992718, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.3167, + "eval_samples_per_second": 433.828, + "eval_steps_per_second": 3.487, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.138662337018444e-05, + "loss": 0.7082, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.691740593264249e-05, + "loss": 0.7018, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.908592465781928, + "eval_loss": 0.8167337775230408, + "eval_precision": 0.9105959610134828, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.111, + "eval_samples_per_second": 444.828, + "eval_steps_per_second": 3.575, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.250475632971089e-05, + "loss": 0.7072, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9112752588759019, + "eval_loss": 0.8070755004882812, + "eval_precision": 0.909058527231022, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.2282, + "eval_samples_per_second": 438.493, + "eval_steps_per_second": 3.524, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.817763380150352e-05, + "loss": 0.7005, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.3964436292922636e-05, + "loss": 0.6982, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9104007348891044, + "eval_loss": 0.8100107312202454, + "eval_precision": 0.9078858103761545, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.8169, + "eval_samples_per_second": 461.564, + "eval_steps_per_second": 3.71, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.989281408424826e-05, + "loss": 0.7012, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9110907829676643, + "eval_loss": 0.8087267875671387, + "eval_precision": 0.9101191121629745, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.1475, + "eval_samples_per_second": 442.838, + "eval_steps_per_second": 3.559, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.5989488328484635e-05, + "loss": 0.6992, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.2280075686363027e-05, + "loss": 0.6966, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9089969468120279, + "eval_loss": 0.8119024634361267, + "eval_precision": 0.9068283756210996, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.9277, + "eval_samples_per_second": 455.112, + "eval_steps_per_second": 3.658, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.8788920209881947e-05, + "loss": 0.6977, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.5538933577696146e-05, + "loss": 0.6967, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9147011515261185, + "eval_loss": 0.8009812831878662, + "eval_precision": 0.9129749159822712, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0475, + "eval_samples_per_second": 448.337, + "eval_steps_per_second": 3.604, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.2551444730854135e-05, + "loss": 0.6952, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.911643296210408, + "eval_loss": 0.8082801699638367, + "eval_precision": 0.9109842169566414, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.3633, + "eval_samples_per_second": 431.41, + "eval_steps_per_second": 3.468, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 9.846059895691212e-06, + "loss": 0.6956, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 7.440533912516941e-06, + "loss": 0.6934, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9167130126302919, + "eval_loss": 0.7990955710411072, + "eval_precision": 0.9149227601944712, + "eval_recall": 0.9212860310421286, + "eval_runtime": 8.059, + "eval_samples_per_second": 447.697, + "eval_steps_per_second": 3.598, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.350653714538273e-06, + "loss": 0.6926, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.590134721720188e-06, + "loss": 0.6898, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9167935491245766, + "eval_loss": 0.8045080304145813, + "eval_precision": 0.9146813451558005, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.1548, + "eval_samples_per_second": 442.44, + "eval_steps_per_second": 3.556, + "step": 573 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.04365369600931074, + "learning_rate": 0.00011107380238780992, + "metric": "eval/loss", + "weight_decay": 0.012659976334904684 + } +} diff --git a/run-b3iy7cum/checkpoint-573/training_args.bin b/run-b3iy7cum/checkpoint-573/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ecb904312f636f3e7259ae62f570ff25b0fab53 --- /dev/null +++ b/run-b3iy7cum/checkpoint-573/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb3721fdb654f9a5b927b31a22fe1255482d2eb9e12e01d3c683592f8a29b80 +size 4792 diff --git a/run-b3iy7cum/checkpoint-630/model.safetensors b/run-b3iy7cum/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10ae9665782c5092c4cf6c3cba1ddf253e84f605 --- /dev/null +++ b/run-b3iy7cum/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a22a478da7785d50cdf65b3798cc2f06d724e54d54539ed8de1da88c5cb47f0 +size 198025308 diff --git a/run-b3iy7cum/checkpoint-630/optimizer.pt b/run-b3iy7cum/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c17e0500890c05b1951cea199a4cbf8b79c926f --- /dev/null +++ b/run-b3iy7cum/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac2fb1d43ef9b5d873167ffa32b082f374610cf4c681e0a8b40c6e526c20bee7 +size 395900602 diff --git a/run-b3iy7cum/checkpoint-630/rng_state.pth b/run-b3iy7cum/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-b3iy7cum/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-b3iy7cum/checkpoint-630/scheduler.pt b/run-b3iy7cum/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8be1844a854abaded3046f9ef057961d42e62d60 --- /dev/null +++ b/run-b3iy7cum/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf4af22c23245afb26103f156c43b0492f434eac99b325726c3b652758e8406 +size 1064 diff --git a/run-b3iy7cum/checkpoint-630/trainer_state.json b/run-b3iy7cum/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..446f0a8af0ab61de3c1322c4420427dd190be890 --- /dev/null +++ b/run-b3iy7cum/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9167935491245766, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-b3iy7cum/checkpoint-573", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.145999548445658e-05, + "loss": 1.4891, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8264966740576497, + "eval_f1": 0.7501969507313436, + "eval_loss": 1.1241339445114136, + "eval_precision": 0.6867941503359228, + "eval_recall": 0.8264966740576497, + "eval_runtime": 8.3443, + "eval_samples_per_second": 432.393, + "eval_steps_per_second": 3.475, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.291999096891316e-05, + "loss": 1.203, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 3.437998645336974e-05, + "loss": 0.9608, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8580931263858093, + "eval_f1": 0.8145928296837187, + "eval_loss": 0.9374946355819702, + "eval_precision": 0.8448013215645496, + "eval_recall": 0.8580931263858093, + "eval_runtime": 8.0911, + "eval_samples_per_second": 445.922, + "eval_steps_per_second": 3.584, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 4.583998193782632e-05, + "loss": 0.8784, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.8895179752774438, + "eval_loss": 0.8623562455177307, + "eval_precision": 0.8906441207781222, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.4025, + "eval_samples_per_second": 429.396, + "eval_steps_per_second": 3.451, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 5.72999774222829e-05, + "loss": 0.8464, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 6.875997290673948e-05, + "loss": 0.8095, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9027018244054482, + "eval_loss": 0.8148450255393982, + "eval_precision": 0.8991916457732703, + "eval_recall": 0.9104767184035477, + "eval_runtime": 8.0712, + "eval_samples_per_second": 447.022, + "eval_steps_per_second": 3.593, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 8.021996839119605e-05, + "loss": 0.8032, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 9.167996387565263e-05, + "loss": 0.7868, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8941241685144125, + "eval_f1": 0.8942512162931542, + "eval_loss": 0.8299208879470825, + "eval_precision": 0.8976682659072109, + "eval_recall": 0.8941241685144125, + "eval_runtime": 8.5166, + "eval_samples_per_second": 423.645, + "eval_steps_per_second": 3.405, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00010313995936010922, + "loss": 0.7699, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9039777674228808, + "eval_loss": 0.8073028326034546, + "eval_precision": 0.9046462056466061, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.3158, + "eval_samples_per_second": 433.874, + "eval_steps_per_second": 3.487, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011105654051929996, + "loss": 0.7726, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011076228539741515, + "loss": 0.7648, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.9015120421169303, + "eval_loss": 0.809033989906311, + "eval_precision": 0.9001669239906706, + "eval_recall": 0.9101995565410199, + "eval_runtime": 8.2613, + "eval_samples_per_second": 436.737, + "eval_steps_per_second": 3.51, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00011010559833430576, + "loss": 0.7588, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00010909078902143868, + "loss": 0.7536, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8965820315016871, + "eval_loss": 0.830237627029419, + "eval_precision": 0.8969960760700141, + "eval_recall": 0.8977272727272727, + "eval_runtime": 8.3437, + "eval_samples_per_second": 432.422, + "eval_steps_per_second": 3.476, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010772451742713356, + "loss": 0.7487, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9027678141279665, + "eval_loss": 0.8092654347419739, + "eval_precision": 0.9034965534189926, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.939, + "eval_samples_per_second": 454.467, + "eval_steps_per_second": 3.653, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010601575008866862, + "loss": 0.7452, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010397570126688627, + "loss": 0.7391, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9083147990042801, + "eval_loss": 0.8062973022460938, + "eval_precision": 0.9053586637651523, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.1303, + "eval_samples_per_second": 443.77, + "eval_steps_per_second": 3.567, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00010161775934948784, + "loss": 0.7323, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8957871396895787, + "eval_f1": 0.8972356143725164, + "eval_loss": 0.8380410075187683, + "eval_precision": 0.9014623680260581, + "eval_recall": 0.8957871396895787, + "eval_runtime": 8.4281, + "eval_samples_per_second": 428.09, + "eval_steps_per_second": 3.441, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 9.895739898601618e-05, + "loss": 0.7322, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.601207953116468e-05, + "loss": 0.7263, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9031716798626874, + "eval_loss": 0.8091875910758972, + "eval_precision": 0.9008695769618928, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.3906, + "eval_samples_per_second": 430.007, + "eval_steps_per_second": 3.456, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 9.280113046290703e-05, + "loss": 0.7257, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 8.934562452742266e-05, + "loss": 0.719, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9004988913525499, + "eval_f1": 0.9012616511377035, + "eval_loss": 0.8300186395645142, + "eval_precision": 0.904676266403974, + "eval_recall": 0.9004988913525499, + "eval_runtime": 8.3008, + "eval_samples_per_second": 434.657, + "eval_steps_per_second": 3.494, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.566823944333993e-05, + "loss": 0.7238, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9093928094595245, + "eval_loss": 0.8134934902191162, + "eval_precision": 0.908231543147672, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.4202, + "eval_samples_per_second": 428.493, + "eval_steps_per_second": 3.444, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 8.17931090729013e-05, + "loss": 0.7151, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.774566503678105e-05, + "loss": 0.7157, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9079822616407982, + "eval_f1": 0.9070385471707949, + "eval_loss": 0.8100745677947998, + "eval_precision": 0.9078809714071604, + "eval_recall": 0.9079822616407982, + "eval_runtime": 8.4146, + "eval_samples_per_second": 428.781, + "eval_steps_per_second": 3.446, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 7.355246981200254e-05, + "loss": 0.7129, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.92410424082959e-05, + "loss": 0.7128, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9099592994489757, + "eval_loss": 0.805757999420166, + "eval_precision": 0.9086141248342123, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.2329, + "eval_samples_per_second": 438.243, + "eval_steps_per_second": 3.522, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.483967776694429e-05, + "loss": 0.7098, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.9012826001484258, + "eval_loss": 0.8254275321960449, + "eval_precision": 0.9052236238978434, + "eval_recall": 0.8999445676274944, + "eval_runtime": 8.141, + "eval_samples_per_second": 443.19, + "eval_steps_per_second": 3.562, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.037726106736306e-05, + "loss": 0.7092, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.588307816007743e-05, + "loss": 0.7085, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9112120736730757, + "eval_loss": 0.8126747012138367, + "eval_precision": 0.9112515657992718, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.3167, + "eval_samples_per_second": 433.828, + "eval_steps_per_second": 3.487, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.138662337018444e-05, + "loss": 0.7082, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.691740593264249e-05, + "loss": 0.7018, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.908592465781928, + "eval_loss": 0.8167337775230408, + "eval_precision": 0.9105959610134828, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.111, + "eval_samples_per_second": 444.828, + "eval_steps_per_second": 3.575, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.250475632971089e-05, + "loss": 0.7072, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9112752588759019, + "eval_loss": 0.8070755004882812, + "eval_precision": 0.909058527231022, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.2282, + "eval_samples_per_second": 438.493, + "eval_steps_per_second": 3.524, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.817763380150352e-05, + "loss": 0.7005, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.3964436292922636e-05, + "loss": 0.6982, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9104007348891044, + "eval_loss": 0.8100107312202454, + "eval_precision": 0.9078858103761545, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.8169, + "eval_samples_per_second": 461.564, + "eval_steps_per_second": 3.71, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.989281408424826e-05, + "loss": 0.7012, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9110907829676643, + "eval_loss": 0.8087267875671387, + "eval_precision": 0.9101191121629745, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.1475, + "eval_samples_per_second": 442.838, + "eval_steps_per_second": 3.559, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.5989488328484635e-05, + "loss": 0.6992, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.2280075686363027e-05, + "loss": 0.6966, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9089969468120279, + "eval_loss": 0.8119024634361267, + "eval_precision": 0.9068283756210996, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.9277, + "eval_samples_per_second": 455.112, + "eval_steps_per_second": 3.658, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.8788920209881947e-05, + "loss": 0.6977, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.5538933577696146e-05, + "loss": 0.6967, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9147011515261185, + "eval_loss": 0.8009812831878662, + "eval_precision": 0.9129749159822712, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0475, + "eval_samples_per_second": 448.337, + "eval_steps_per_second": 3.604, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.2551444730854135e-05, + "loss": 0.6952, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.911643296210408, + "eval_loss": 0.8082801699638367, + "eval_precision": 0.9109842169566414, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.3633, + "eval_samples_per_second": 431.41, + "eval_steps_per_second": 3.468, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 9.846059895691212e-06, + "loss": 0.6956, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 7.440533912516941e-06, + "loss": 0.6934, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9167130126302919, + "eval_loss": 0.7990955710411072, + "eval_precision": 0.9149227601944712, + "eval_recall": 0.9212860310421286, + "eval_runtime": 8.059, + "eval_samples_per_second": 447.697, + "eval_steps_per_second": 3.598, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.350653714538273e-06, + "loss": 0.6926, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.590134721720188e-06, + "loss": 0.6898, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9167935491245766, + "eval_loss": 0.8045080304145813, + "eval_precision": 0.9146813451558005, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.1548, + "eval_samples_per_second": 442.44, + "eval_steps_per_second": 3.556, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.1705308295282073e-06, + "loss": 0.6949, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9103815860644321, + "eval_loss": 0.8065946102142334, + "eval_precision": 0.9088516755127686, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.4508, + "eval_samples_per_second": 426.94, + "eval_steps_per_second": 3.432, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.1011585832763713e-06, + "loss": 0.6955, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.890360357078231e-07, + "loss": 0.6942, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9162347548493954, + "eval_loss": 0.8001212477684021, + "eval_precision": 0.9132188082899726, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.4361, + "eval_samples_per_second": 427.686, + "eval_steps_per_second": 3.438, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 3.883668907207736e-08, + "loss": 0.6959, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9139588893579951, + "eval_loss": 0.8064656257629395, + "eval_precision": 0.9127129204695852, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.7228, + "eval_samples_per_second": 467.188, + "eval_steps_per_second": 3.755, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.04365369600931074, + "learning_rate": 0.00011107380238780992, + "metric": "eval/loss", + "weight_decay": 0.012659976334904684 + } +} diff --git a/run-b3iy7cum/checkpoint-630/training_args.bin b/run-b3iy7cum/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ecb904312f636f3e7259ae62f570ff25b0fab53 --- /dev/null +++ b/run-b3iy7cum/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb3721fdb654f9a5b927b31a22fe1255482d2eb9e12e01d3c683592f8a29b80 +size 4792 diff --git a/run-bbng86vt/checkpoint-510/model.safetensors b/run-bbng86vt/checkpoint-510/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df26a77200687c06b3f0fcfc26794957536f0523 --- /dev/null +++ b/run-bbng86vt/checkpoint-510/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87df2f88cded3101377d97303d428dfaac1a7cb78298074cb084c2995eb48abe +size 198025308 diff --git a/run-bbng86vt/checkpoint-510/optimizer.pt b/run-bbng86vt/checkpoint-510/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c237ddffbe81b8ef97ca4ff4c31498319495718c --- /dev/null +++ b/run-bbng86vt/checkpoint-510/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482ade16afabbdb8183e5c35d4f9256fbc0fe05abd783b520d2212917b177c69 +size 395900602 diff --git a/run-bbng86vt/checkpoint-510/rng_state.pth b/run-bbng86vt/checkpoint-510/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d8dc24231ef2178e0e7f1fb8387b9f8514188b8 --- /dev/null +++ b/run-bbng86vt/checkpoint-510/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff11e63c65ca788e4e4341e1970557dcb3db9d0cb5075b86ffbecfbd1dc05a1a +size 14244 diff --git a/run-bbng86vt/checkpoint-510/scheduler.pt b/run-bbng86vt/checkpoint-510/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f86d17222dffc44fc98b18741fbac32f1b7c7aea --- /dev/null +++ b/run-bbng86vt/checkpoint-510/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b74981844f89414d2de01e95db8791429670d9478938fda2207a3404a0f98d43 +size 1064 diff --git a/run-bbng86vt/checkpoint-510/trainer_state.json b/run-bbng86vt/checkpoint-510/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f7813f7d767a6b57e96f3cf2f307529c90682530 --- /dev/null +++ b/run-bbng86vt/checkpoint-510/trainer_state.json @@ -0,0 +1,550 @@ +{ + "best_metric": 0.919006940903375, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-bbng86vt/checkpoint-510", + "epoch": 24.0, + "eval_steps": 500, + "global_step": 510, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.3342634804857996e-05, + "loss": 1.4434, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9800465106964111, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.082, + "eval_samples_per_second": 446.423, + "eval_steps_per_second": 3.588, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 4.668526960971599e-05, + "loss": 1.0596, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 7.002790441457399e-05, + "loss": 0.9053, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8791574279379157, + "eval_f1": 0.8649655885052044, + "eval_loss": 0.9112829566001892, + "eval_precision": 0.8816125625645339, + "eval_recall": 0.8791574279379157, + "eval_runtime": 7.7392, + "eval_samples_per_second": 466.201, + "eval_steps_per_second": 3.747, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 9.337053921943199e-05, + "loss": 0.8371, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.8973302936118829, + "eval_loss": 0.8175995945930481, + "eval_precision": 0.893116770640266, + "eval_recall": 0.9057649667405765, + "eval_runtime": 8.2058, + "eval_samples_per_second": 439.686, + "eval_steps_per_second": 3.534, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00011671317402429, + "loss": 0.8128, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00014005580882914799, + "loss": 0.7901, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7840909090909091, + "eval_f1": 0.8156018386888032, + "eval_loss": 0.9902897477149963, + "eval_precision": 0.8849572470181729, + "eval_recall": 0.7840909090909091, + "eval_runtime": 8.268, + "eval_samples_per_second": 436.38, + "eval_steps_per_second": 3.507, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00016339844363400598, + "loss": 0.7899, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00018674107843886397, + "loss": 0.7775, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.8992932908706694, + "eval_loss": 0.8180431127548218, + "eval_precision": 0.8973880876621988, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.5345, + "eval_samples_per_second": 422.753, + "eval_steps_per_second": 3.398, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.000210083713243722, + "loss": 0.7579, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9041019955654102, + "eval_f1": 0.9047616937962585, + "eval_loss": 0.82468581199646, + "eval_precision": 0.9085287756658682, + "eval_recall": 0.9041019955654102, + "eval_runtime": 8.2856, + "eval_samples_per_second": 435.455, + "eval_steps_per_second": 3.5, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00022620883852432518, + "loss": 0.7665, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0002256094761721382, + "loss": 0.7545, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.8972552044346648, + "eval_loss": 0.8219521641731262, + "eval_precision": 0.8964586313843746, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.2646, + "eval_samples_per_second": 436.561, + "eval_steps_per_second": 3.509, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00022427188347273196, + "loss": 0.7498, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0002222048387501617, + "loss": 0.7445, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.438470066518847, + "eval_f1": 0.5006055986905423, + "eval_loss": 1.628142237663269, + "eval_precision": 0.8626541826663777, + "eval_recall": 0.438470066518847, + "eval_runtime": 8.1375, + "eval_samples_per_second": 443.378, + "eval_steps_per_second": 3.564, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002194219075602348, + "loss": 0.7427, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8946784922394678, + "eval_f1": 0.8767800272198636, + "eval_loss": 0.8437649011611938, + "eval_precision": 0.882780221984318, + "eval_recall": 0.8946784922394678, + "eval_runtime": 7.9653, + "eval_samples_per_second": 452.963, + "eval_steps_per_second": 3.641, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00021594135366277857, + "loss": 0.7371, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00021178601916063726, + "loss": 0.7311, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8874722838137472, + "eval_f1": 0.8910605753397166, + "eval_loss": 0.8464184999465942, + "eval_precision": 0.8985629988450767, + "eval_recall": 0.8874722838137472, + "eval_runtime": 8.3673, + "eval_samples_per_second": 431.203, + "eval_steps_per_second": 3.466, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002069831745920202, + "loss": 0.7236, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8907982261640798, + "eval_f1": 0.8930148495689161, + "eval_loss": 0.84920334815979, + "eval_precision": 0.8990654119704178, + "eval_recall": 0.8907982261640798, + "eval_runtime": 7.8087, + "eval_samples_per_second": 462.049, + "eval_steps_per_second": 3.714, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00020156433996001134, + "loss": 0.7254, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00019556507787378357, + "loss": 0.7174, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9089335928890313, + "eval_loss": 0.8189741373062134, + "eval_precision": 0.9103045990163888, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.2202, + "eval_samples_per_second": 438.918, + "eval_steps_per_second": 3.528, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001890247601590867, + "loss": 0.7202, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00018198630946969225, + "loss": 0.7129, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9110087741483047, + "eval_loss": 0.8102943301200867, + "eval_precision": 0.9086880786410851, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.0877, + "eval_samples_per_second": 446.108, + "eval_steps_per_second": 3.586, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00017449591759554172, + "loss": 0.7135, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9080291130097499, + "eval_loss": 0.8151609301567078, + "eval_precision": 0.9053359796213186, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.3601, + "eval_samples_per_second": 431.576, + "eval_steps_per_second": 3.469, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00016660274231628005, + "loss": 0.7057, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00015835858478965641, + "loss": 0.7097, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.90208964287232, + "eval_loss": 0.8241292834281921, + "eval_precision": 0.9110204048586549, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.1061, + "eval_samples_per_second": 445.099, + "eval_steps_per_second": 3.578, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00014981754959202164, + "loss": 0.7098, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00014103568964199975, + "loss": 0.7107, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9128934820398997, + "eval_loss": 0.8097543716430664, + "eval_precision": 0.9144667718447255, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.2322, + "eval_samples_per_second": 438.279, + "eval_steps_per_second": 3.523, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00013207063833762243, + "loss": 0.7038, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.9043746208541081, + "eval_loss": 0.8262745141983032, + "eval_precision": 0.9075899146905041, + "eval_recall": 0.9038248337028825, + "eval_runtime": 8.1658, + "eval_samples_per_second": 441.841, + "eval_steps_per_second": 3.551, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0001229812313211271, + "loss": 0.7036, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00011382712035369353, + "loss": 0.7016, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9075901446677368, + "eval_loss": 0.8123452067375183, + "eval_precision": 0.905938338644551, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.1773, + "eval_samples_per_second": 441.223, + "eval_steps_per_second": 3.546, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00010466838183417277, + "loss": 0.7018, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 9.556512253101327e-05, + "loss": 0.6976, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9057679485077457, + "eval_loss": 0.8173046112060547, + "eval_precision": 0.9032277015837212, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.306, + "eval_samples_per_second": 434.385, + "eval_steps_per_second": 3.491, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 8.657708511487825e-05, + "loss": 0.7023, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9094313020806192, + "eval_loss": 0.8093283176422119, + "eval_precision": 0.9070864235402264, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.456, + "eval_samples_per_second": 426.679, + "eval_steps_per_second": 3.43, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 7.776325608075555e-05, + "loss": 0.6951, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 6.91814786326814e-05, + "loss": 0.6921, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9065679928961826, + "eval_loss": 0.8198642730712891, + "eval_precision": 0.9071234623498421, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.377, + "eval_samples_per_second": 430.702, + "eval_steps_per_second": 3.462, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 6.088807307162832e-05, + "loss": 0.6938, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9097619999989606, + "eval_loss": 0.8165462613105774, + "eval_precision": 0.9093438414115602, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.2965, + "eval_samples_per_second": 434.881, + "eval_steps_per_second": 3.495, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 5.2937467177868044e-05, + "loss": 0.6925, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 4.538183901352815e-05, + "loss": 0.6894, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9106001676047935, + "eval_loss": 0.8112087249755859, + "eval_precision": 0.9095045105926114, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.5998, + "eval_samples_per_second": 419.545, + "eval_steps_per_second": 3.372, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.827077448954923e-05, + "loss": 0.692, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.165094194435509e-05, + "loss": 0.69, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.919006940903375, + "eval_loss": 0.7976137399673462, + "eval_precision": 0.9170502214438332, + "eval_recall": 0.9229490022172949, + "eval_runtime": 8.2077, + "eval_samples_per_second": 439.589, + "eval_steps_per_second": 3.533, + "step": 510 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.12082080830493924, + "learning_rate": 0.00022624399887785445, + "metric": "eval/loss", + "weight_decay": 0.06261597404505846 + } +} diff --git a/run-bbng86vt/checkpoint-510/training_args.bin b/run-bbng86vt/checkpoint-510/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd411a575b0c10ad650c0c3fc5f48c4c2167ed87 --- /dev/null +++ b/run-bbng86vt/checkpoint-510/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:687b4c28cec21b86cdedf064ddec6dd2069b6a21f50cce311216543ce7bb1634 +size 4792 diff --git a/run-bbng86vt/checkpoint-630/model.safetensors b/run-bbng86vt/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4883c87dc854ad17ff5cb4d1e83cbf5d8bc9ca7f --- /dev/null +++ b/run-bbng86vt/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c3106b3bce4dd84fb30e3d002495293238268d3bd6dba5b2c698c7b4f0ec0d +size 198025308 diff --git a/run-bbng86vt/checkpoint-630/optimizer.pt b/run-bbng86vt/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..981006b5ff357401e9c2735bd39f4422df6eae3d --- /dev/null +++ b/run-bbng86vt/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45932733d2a3295d90b3ee0c1072427e96797db5dd9d6fa108e8b8a0627db858 +size 395900602 diff --git a/run-bbng86vt/checkpoint-630/rng_state.pth b/run-bbng86vt/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-bbng86vt/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-bbng86vt/checkpoint-630/scheduler.pt b/run-bbng86vt/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fb6a190a958ec7cdeb3c4fe8e5d32315dc23ca7 --- /dev/null +++ b/run-bbng86vt/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5776d97ee4f49908408c9e68c0038ca30c1c7a6691cb0474efd6c0f70d1b2522 +size 1064 diff --git a/run-bbng86vt/checkpoint-630/trainer_state.json b/run-bbng86vt/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c3341d38c19556c5ba8f655656ef7cfab7d9270c --- /dev/null +++ b/run-bbng86vt/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.919006940903375, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-bbng86vt/checkpoint-510", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.3342634804857996e-05, + "loss": 1.4434, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9800465106964111, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.082, + "eval_samples_per_second": 446.423, + "eval_steps_per_second": 3.588, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 4.668526960971599e-05, + "loss": 1.0596, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 7.002790441457399e-05, + "loss": 0.9053, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8791574279379157, + "eval_f1": 0.8649655885052044, + "eval_loss": 0.9112829566001892, + "eval_precision": 0.8816125625645339, + "eval_recall": 0.8791574279379157, + "eval_runtime": 7.7392, + "eval_samples_per_second": 466.201, + "eval_steps_per_second": 3.747, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 9.337053921943199e-05, + "loss": 0.8371, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.8973302936118829, + "eval_loss": 0.8175995945930481, + "eval_precision": 0.893116770640266, + "eval_recall": 0.9057649667405765, + "eval_runtime": 8.2058, + "eval_samples_per_second": 439.686, + "eval_steps_per_second": 3.534, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00011671317402429, + "loss": 0.8128, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00014005580882914799, + "loss": 0.7901, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7840909090909091, + "eval_f1": 0.8156018386888032, + "eval_loss": 0.9902897477149963, + "eval_precision": 0.8849572470181729, + "eval_recall": 0.7840909090909091, + "eval_runtime": 8.268, + "eval_samples_per_second": 436.38, + "eval_steps_per_second": 3.507, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00016339844363400598, + "loss": 0.7899, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00018674107843886397, + "loss": 0.7775, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.8992932908706694, + "eval_loss": 0.8180431127548218, + "eval_precision": 0.8973880876621988, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.5345, + "eval_samples_per_second": 422.753, + "eval_steps_per_second": 3.398, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.000210083713243722, + "loss": 0.7579, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9041019955654102, + "eval_f1": 0.9047616937962585, + "eval_loss": 0.82468581199646, + "eval_precision": 0.9085287756658682, + "eval_recall": 0.9041019955654102, + "eval_runtime": 8.2856, + "eval_samples_per_second": 435.455, + "eval_steps_per_second": 3.5, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00022620883852432518, + "loss": 0.7665, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0002256094761721382, + "loss": 0.7545, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.8972552044346648, + "eval_loss": 0.8219521641731262, + "eval_precision": 0.8964586313843746, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.2646, + "eval_samples_per_second": 436.561, + "eval_steps_per_second": 3.509, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00022427188347273196, + "loss": 0.7498, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0002222048387501617, + "loss": 0.7445, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.438470066518847, + "eval_f1": 0.5006055986905423, + "eval_loss": 1.628142237663269, + "eval_precision": 0.8626541826663777, + "eval_recall": 0.438470066518847, + "eval_runtime": 8.1375, + "eval_samples_per_second": 443.378, + "eval_steps_per_second": 3.564, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002194219075602348, + "loss": 0.7427, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8946784922394678, + "eval_f1": 0.8767800272198636, + "eval_loss": 0.8437649011611938, + "eval_precision": 0.882780221984318, + "eval_recall": 0.8946784922394678, + "eval_runtime": 7.9653, + "eval_samples_per_second": 452.963, + "eval_steps_per_second": 3.641, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00021594135366277857, + "loss": 0.7371, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00021178601916063726, + "loss": 0.7311, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8874722838137472, + "eval_f1": 0.8910605753397166, + "eval_loss": 0.8464184999465942, + "eval_precision": 0.8985629988450767, + "eval_recall": 0.8874722838137472, + "eval_runtime": 8.3673, + "eval_samples_per_second": 431.203, + "eval_steps_per_second": 3.466, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002069831745920202, + "loss": 0.7236, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8907982261640798, + "eval_f1": 0.8930148495689161, + "eval_loss": 0.84920334815979, + "eval_precision": 0.8990654119704178, + "eval_recall": 0.8907982261640798, + "eval_runtime": 7.8087, + "eval_samples_per_second": 462.049, + "eval_steps_per_second": 3.714, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00020156433996001134, + "loss": 0.7254, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00019556507787378357, + "loss": 0.7174, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9089335928890313, + "eval_loss": 0.8189741373062134, + "eval_precision": 0.9103045990163888, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.2202, + "eval_samples_per_second": 438.918, + "eval_steps_per_second": 3.528, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001890247601590867, + "loss": 0.7202, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00018198630946969225, + "loss": 0.7129, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9110087741483047, + "eval_loss": 0.8102943301200867, + "eval_precision": 0.9086880786410851, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.0877, + "eval_samples_per_second": 446.108, + "eval_steps_per_second": 3.586, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00017449591759554172, + "loss": 0.7135, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9080291130097499, + "eval_loss": 0.8151609301567078, + "eval_precision": 0.9053359796213186, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.3601, + "eval_samples_per_second": 431.576, + "eval_steps_per_second": 3.469, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00016660274231628005, + "loss": 0.7057, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00015835858478965641, + "loss": 0.7097, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.90208964287232, + "eval_loss": 0.8241292834281921, + "eval_precision": 0.9110204048586549, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.1061, + "eval_samples_per_second": 445.099, + "eval_steps_per_second": 3.578, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00014981754959202164, + "loss": 0.7098, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00014103568964199975, + "loss": 0.7107, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9128934820398997, + "eval_loss": 0.8097543716430664, + "eval_precision": 0.9144667718447255, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.2322, + "eval_samples_per_second": 438.279, + "eval_steps_per_second": 3.523, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00013207063833762243, + "loss": 0.7038, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.9043746208541081, + "eval_loss": 0.8262745141983032, + "eval_precision": 0.9075899146905041, + "eval_recall": 0.9038248337028825, + "eval_runtime": 8.1658, + "eval_samples_per_second": 441.841, + "eval_steps_per_second": 3.551, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0001229812313211271, + "loss": 0.7036, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00011382712035369353, + "loss": 0.7016, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9075901446677368, + "eval_loss": 0.8123452067375183, + "eval_precision": 0.905938338644551, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.1773, + "eval_samples_per_second": 441.223, + "eval_steps_per_second": 3.546, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00010466838183417277, + "loss": 0.7018, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 9.556512253101327e-05, + "loss": 0.6976, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9057679485077457, + "eval_loss": 0.8173046112060547, + "eval_precision": 0.9032277015837212, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.306, + "eval_samples_per_second": 434.385, + "eval_steps_per_second": 3.491, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 8.657708511487825e-05, + "loss": 0.7023, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9094313020806192, + "eval_loss": 0.8093283176422119, + "eval_precision": 0.9070864235402264, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.456, + "eval_samples_per_second": 426.679, + "eval_steps_per_second": 3.43, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 7.776325608075555e-05, + "loss": 0.6951, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 6.91814786326814e-05, + "loss": 0.6921, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9065679928961826, + "eval_loss": 0.8198642730712891, + "eval_precision": 0.9071234623498421, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.377, + "eval_samples_per_second": 430.702, + "eval_steps_per_second": 3.462, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 6.088807307162832e-05, + "loss": 0.6938, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9097619999989606, + "eval_loss": 0.8165462613105774, + "eval_precision": 0.9093438414115602, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.2965, + "eval_samples_per_second": 434.881, + "eval_steps_per_second": 3.495, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 5.2937467177868044e-05, + "loss": 0.6925, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 4.538183901352815e-05, + "loss": 0.6894, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9106001676047935, + "eval_loss": 0.8112087249755859, + "eval_precision": 0.9095045105926114, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.5998, + "eval_samples_per_second": 419.545, + "eval_steps_per_second": 3.372, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.827077448954923e-05, + "loss": 0.692, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.165094194435509e-05, + "loss": 0.69, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.919006940903375, + "eval_loss": 0.7976137399673462, + "eval_precision": 0.9170502214438332, + "eval_recall": 0.9229490022172949, + "eval_runtime": 8.2077, + "eval_samples_per_second": 439.589, + "eval_steps_per_second": 3.533, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.5565785869904315e-05, + "loss": 0.6918, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9119706719176081, + "eval_loss": 0.8117545247077942, + "eval_precision": 0.9113747051947674, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.1035, + "eval_samples_per_second": 445.239, + "eval_steps_per_second": 3.579, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.005524179513031e-05, + "loss": 0.6891, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.5155474197927225e-05, + "loss": 0.6869, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.9184379741181177, + "eval_loss": 0.8007141351699829, + "eval_precision": 0.9178856423835274, + "eval_recall": 0.9226718403547672, + "eval_runtime": 7.862, + "eval_samples_per_second": 458.916, + "eval_steps_per_second": 3.689, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.0898639165707e-05, + "loss": 0.686, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 7.3126733621334145e-06, + "loss": 0.6856, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9156604329454298, + "eval_loss": 0.8065941333770752, + "eval_precision": 0.9148760178273898, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.3244, + "eval_samples_per_second": 433.423, + "eval_steps_per_second": 3.484, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 4.421110684998229e-06, + "loss": 0.686, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9107884434652165, + "eval_loss": 0.8128623962402344, + "eval_precision": 0.909232373770158, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.0918, + "eval_samples_per_second": 445.882, + "eval_steps_per_second": 3.584, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.242927818472348e-06, + "loss": 0.6869, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 7.924196933388258e-07, + "loss": 0.6869, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9155791663645007, + "eval_loss": 0.8030709028244019, + "eval_precision": 0.9138430980200704, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.3862, + "eval_samples_per_second": 430.229, + "eval_steps_per_second": 3.458, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 7.910567253441713e-08, + "loss": 0.6894, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9141860175837083, + "eval_loss": 0.8105825781822205, + "eval_precision": 0.9134542776376958, + "eval_recall": 0.9196230598669624, + "eval_runtime": 7.7684, + "eval_samples_per_second": 464.448, + "eval_steps_per_second": 3.733, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.12082080830493924, + "learning_rate": 0.00022624399887785445, + "metric": "eval/loss", + "weight_decay": 0.06261597404505846 + } +} diff --git a/run-bbng86vt/checkpoint-630/training_args.bin b/run-bbng86vt/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd411a575b0c10ad650c0c3fc5f48c4c2167ed87 --- /dev/null +++ b/run-bbng86vt/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:687b4c28cec21b86cdedf064ddec6dd2069b6a21f50cce311216543ce7bb1634 +size 4792 diff --git a/run-bby12rkd/checkpoint-616/model.safetensors b/run-bby12rkd/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..310ee27310c7bc9022d2f768e653deac3350754e --- /dev/null +++ b/run-bby12rkd/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:604969e7f776b6876fd906a28c2258214a1d7bdeee158f768fc4a7b1c76827fd +size 198025308 diff --git a/run-bby12rkd/checkpoint-616/optimizer.pt b/run-bby12rkd/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5ce60c89e8d0c0dac0c9fdfb3ccbb1e34cea606 --- /dev/null +++ b/run-bby12rkd/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec6cfa1cba72806b73c25337312ee10dacd9c88520c60f8c087f6d8d8108093 +size 395900602 diff --git a/run-bby12rkd/checkpoint-616/rng_state.pth b/run-bby12rkd/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-bby12rkd/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-bby12rkd/checkpoint-616/scheduler.pt b/run-bby12rkd/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..166048c181a3c0d3b900c7d1c51ee4ec9e40babc --- /dev/null +++ b/run-bby12rkd/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54d3eb3ef2103249d36a970d3399b0842cda56cc18c9302e0681bd9a3d2b8a1 +size 1064 diff --git a/run-bby12rkd/checkpoint-616/trainer_state.json b/run-bby12rkd/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7d55d28e212041f96e3c0d0c85842e37b9968d51 --- /dev/null +++ b/run-bby12rkd/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9210302061521183, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-bby12rkd/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.9478177833699686e-05, + "loss": 1.476, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9933125972747803, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.6425, + "eval_samples_per_second": 472.098, + "eval_steps_per_second": 3.795, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.895635566739937e-05, + "loss": 1.0879, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.8434533501099055e-05, + "loss": 0.931, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8805432372505543, + "eval_f1": 0.8593594247999768, + "eval_loss": 0.9153474569320679, + "eval_precision": 0.8781736130999094, + "eval_recall": 0.8805432372505543, + "eval_runtime": 8.0752, + "eval_samples_per_second": 446.798, + "eval_steps_per_second": 3.591, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.791271133479874e-05, + "loss": 0.863, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9061384915500872, + "eval_loss": 0.8912007808685303, + "eval_precision": 0.9024078264746547, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.7556, + "eval_samples_per_second": 465.21, + "eval_steps_per_second": 3.739, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.739088916849843e-05, + "loss": 0.8239, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00011686906700219811, + "loss": 0.799, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9030884661800006, + "eval_loss": 0.8156266808509827, + "eval_precision": 0.9003789835266865, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.2058, + "eval_samples_per_second": 439.69, + "eval_steps_per_second": 3.534, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001363472448358978, + "loss": 0.8013, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00014224116274559866, + "loss": 0.7762, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.905073723295871, + "eval_loss": 0.8157945871353149, + "eval_precision": 0.9075096059663029, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.1055, + "eval_samples_per_second": 445.133, + "eval_steps_per_second": 3.578, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00014174746566815026, + "loss": 0.7757, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8992436805894887, + "eval_loss": 0.8352044224739075, + "eval_precision": 0.9060764938030305, + "eval_recall": 0.8968957871396895, + "eval_runtime": 8.263, + "eval_samples_per_second": 436.646, + "eval_steps_per_second": 3.51, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00014084268286118905, + "loss": 0.7668, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00013953208434531068, + "loss": 0.7621, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9087182275326549, + "eval_loss": 0.8042978644371033, + "eval_precision": 0.905902063489189, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.2223, + "eval_samples_per_second": 438.808, + "eval_steps_per_second": 3.527, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00013782330386545165, + "loss": 0.7604, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00013572629442718738, + "loss": 0.7468, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9079445057888318, + "eval_loss": 0.8031318187713623, + "eval_precision": 0.906246100705943, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.4598, + "eval_samples_per_second": 483.661, + "eval_steps_per_second": 3.888, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013325327032420668, + "loss": 0.7406, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9091717290526049, + "eval_loss": 0.8017144799232483, + "eval_precision": 0.9089365035738217, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.7183, + "eval_samples_per_second": 467.459, + "eval_steps_per_second": 3.757, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.000130418635994631, + "loss": 0.7424, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00012723890212056315, + "loss": 0.7351, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.909637696849185, + "eval_loss": 0.8041776418685913, + "eval_precision": 0.907971736250602, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.8376, + "eval_samples_per_second": 460.344, + "eval_steps_per_second": 3.7, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00012373258945955516, + "loss": 0.7285, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8995224089930041, + "eval_loss": 0.8275798559188843, + "eval_precision": 0.9037905718861858, + "eval_recall": 0.8977272727272727, + "eval_runtime": 7.9177, + "eval_samples_per_second": 455.687, + "eval_steps_per_second": 3.663, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00011992012096813975, + "loss": 0.7373, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00011582370284576647, + "loss": 0.7231, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9109734472081614, + "eval_loss": 0.8144450783729553, + "eval_precision": 0.9118659094815363, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.9411, + "eval_samples_per_second": 454.347, + "eval_steps_per_second": 3.652, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001114671951920162, + "loss": 0.7184, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001068759730304676, + "loss": 0.7174, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.909135405812888, + "eval_loss": 0.8156898021697998, + "eval_precision": 0.909278592037413, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.6586, + "eval_samples_per_second": 471.102, + "eval_steps_per_second": 3.787, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00010207677850869928, + "loss": 0.7142, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9007945355514471, + "eval_loss": 0.8279581069946289, + "eval_precision": 0.9038038003958175, + "eval_recall": 0.9113082039911308, + "eval_runtime": 8.215, + "eval_samples_per_second": 439.197, + "eval_steps_per_second": 3.53, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 9.709756513530751e-05, + "loss": 0.7159, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 9.196733496119976e-05, + "loss": 0.7126, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9091109781814377, + "eval_loss": 0.8070549964904785, + "eval_precision": 0.9071926347508509, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.7635, + "eval_samples_per_second": 464.739, + "eval_steps_per_second": 3.735, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 8.671596965352283e-05, + "loss": 0.709, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.137405644615612e-05, + "loss": 0.7136, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9086646217115045, + "eval_loss": 0.8041656613349915, + "eval_precision": 0.9059512860490292, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.7847, + "eval_samples_per_second": 463.47, + "eval_steps_per_second": 3.725, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 7.597270998054306e-05, + "loss": 0.7042, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9128043301325549, + "eval_loss": 0.802530825138092, + "eval_precision": 0.909940273485026, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.0282, + "eval_samples_per_second": 449.416, + "eval_steps_per_second": 3.612, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.054339107457148e-05, + "loss": 0.7056, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 6.511772347510579e-05, + "loss": 0.705, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9129191777560949, + "eval_loss": 0.803310751914978, + "eval_precision": 0.9120994550110403, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.7469, + "eval_samples_per_second": 465.738, + "eval_steps_per_second": 3.743, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.972730966151907e-05, + "loss": 0.7025, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.4403546773100375e-05, + "loss": 0.7021, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.910654399792804, + "eval_loss": 0.8065840601921082, + "eval_precision": 0.9069881516005327, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.6522, + "eval_samples_per_second": 471.495, + "eval_steps_per_second": 3.79, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.917744373249089e-05, + "loss": 0.6996, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8980044345898004, + "eval_f1": 0.8997929728892949, + "eval_loss": 0.829388439655304, + "eval_precision": 0.9030476989737719, + "eval_recall": 0.8980044345898004, + "eval_runtime": 7.7949, + "eval_samples_per_second": 462.869, + "eval_steps_per_second": 3.72, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.407944063033655e-05, + "loss": 0.7012, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.913923142317388e-05, + "loss": 0.7001, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9136259289469392, + "eval_loss": 0.8053127527236938, + "eval_precision": 0.9139408798257811, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9179, + "eval_samples_per_second": 455.676, + "eval_steps_per_second": 3.663, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.438559097726694e-05, + "loss": 0.6969, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9096417918638233, + "eval_loss": 0.809673011302948, + "eval_precision": 0.9068594859972906, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.0512, + "eval_samples_per_second": 448.132, + "eval_steps_per_second": 3.602, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.984620746580088e-05, + "loss": 0.6993, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.5547521095654953e-05, + "loss": 0.696, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.9176529036407934, + "eval_loss": 0.8008747100830078, + "eval_precision": 0.9168418370864793, + "eval_recall": 0.9221175166297118, + "eval_runtime": 7.6325, + "eval_samples_per_second": 472.715, + "eval_steps_per_second": 3.8, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.151457010311128e-05, + "loss": 0.695, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.7770844915516967e-05, + "loss": 0.694, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9152280318347947, + "eval_loss": 0.806031346321106, + "eval_precision": 0.9134102682528302, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.9838, + "eval_samples_per_second": 451.917, + "eval_steps_per_second": 3.632, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.4338151328352614e-05, + "loss": 0.6966, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9163489585109316, + "eval_loss": 0.8020098805427551, + "eval_precision": 0.9136792717065519, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.4971, + "eval_samples_per_second": 481.253, + "eval_steps_per_second": 3.868, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.1236483494649816e-05, + "loss": 0.6903, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 8.483907466546861e-06, + "loss": 0.6927, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9141445612391859, + "eval_loss": 0.7987990975379944, + "eval_precision": 0.9113136140366306, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.8726, + "eval_samples_per_second": 458.299, + "eval_steps_per_second": 3.684, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.096455967308938e-06, + "loss": 0.6934, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.088035006726664e-06, + "loss": 0.6917, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9129174846245673, + "eval_loss": 0.8045623302459717, + "eval_precision": 0.910341298839254, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.1537, + "eval_samples_per_second": 442.5, + "eval_steps_per_second": 3.557, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.470342883823302e-06, + "loss": 0.6921, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9142830868671395, + "eval_loss": 0.8011919856071472, + "eval_precision": 0.9112303017075651, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.8833, + "eval_samples_per_second": 457.676, + "eval_steps_per_second": 3.679, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.2528020486492668e-06, + "loss": 0.6925, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.4250422004359235e-07, + "loss": 0.6943, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9246119733924612, + "eval_f1": 0.9210302061521183, + "eval_loss": 0.7939128279685974, + "eval_precision": 0.919001696254973, + "eval_recall": 0.9246119733924612, + "eval_runtime": 7.9937, + "eval_samples_per_second": 451.358, + "eval_steps_per_second": 3.628, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.49975930387655326, + "learning_rate": 0.00014234053032319, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-bby12rkd/checkpoint-616/training_args.bin b/run-bby12rkd/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8700cb056558c8137c9c3cdfa80db5e0d7edfe49 --- /dev/null +++ b/run-bby12rkd/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35f6efd6d394394c556a96b1b6be2a98bff9787600cafbe2fd849f30239f4585 +size 4792 diff --git a/run-bby12rkd/checkpoint-630/model.safetensors b/run-bby12rkd/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c29c0f3aa8037ac09513a67bd1e3073099ade9a9 --- /dev/null +++ b/run-bby12rkd/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b09a171f0447ae811315fe2c43af45f29c10687fd724dfaafa8dbb1715d4583 +size 198025308 diff --git a/run-bby12rkd/checkpoint-630/optimizer.pt b/run-bby12rkd/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5e541f072f65ae9f7f7fcb35a3a25b50558150e --- /dev/null +++ b/run-bby12rkd/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4761bbb8b9df51d807dc322c60c3b3f51c4063b50117acd27f44f420188ee6b4 +size 395900602 diff --git a/run-bby12rkd/checkpoint-630/rng_state.pth b/run-bby12rkd/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-bby12rkd/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-bby12rkd/checkpoint-630/scheduler.pt b/run-bby12rkd/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..521b4e936b59ccaecac5429ed780735936d97c83 --- /dev/null +++ b/run-bby12rkd/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291ce0738c571b937efe88674c9268a55974060aae8a2291271f15bf324a98dc +size 1064 diff --git a/run-bby12rkd/checkpoint-630/trainer_state.json b/run-bby12rkd/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..02301e2a763f2cfed1af1129075d1ede9a0f57c6 --- /dev/null +++ b/run-bby12rkd/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9210302061521183, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-bby12rkd/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.9478177833699686e-05, + "loss": 1.476, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9933125972747803, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.6425, + "eval_samples_per_second": 472.098, + "eval_steps_per_second": 3.795, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.895635566739937e-05, + "loss": 1.0879, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.8434533501099055e-05, + "loss": 0.931, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8805432372505543, + "eval_f1": 0.8593594247999768, + "eval_loss": 0.9153474569320679, + "eval_precision": 0.8781736130999094, + "eval_recall": 0.8805432372505543, + "eval_runtime": 8.0752, + "eval_samples_per_second": 446.798, + "eval_steps_per_second": 3.591, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.791271133479874e-05, + "loss": 0.863, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9061384915500872, + "eval_loss": 0.8912007808685303, + "eval_precision": 0.9024078264746547, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.7556, + "eval_samples_per_second": 465.21, + "eval_steps_per_second": 3.739, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.739088916849843e-05, + "loss": 0.8239, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00011686906700219811, + "loss": 0.799, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9030884661800006, + "eval_loss": 0.8156266808509827, + "eval_precision": 0.9003789835266865, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.2058, + "eval_samples_per_second": 439.69, + "eval_steps_per_second": 3.534, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001363472448358978, + "loss": 0.8013, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00014224116274559866, + "loss": 0.7762, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.905073723295871, + "eval_loss": 0.8157945871353149, + "eval_precision": 0.9075096059663029, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.1055, + "eval_samples_per_second": 445.133, + "eval_steps_per_second": 3.578, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00014174746566815026, + "loss": 0.7757, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8992436805894887, + "eval_loss": 0.8352044224739075, + "eval_precision": 0.9060764938030305, + "eval_recall": 0.8968957871396895, + "eval_runtime": 8.263, + "eval_samples_per_second": 436.646, + "eval_steps_per_second": 3.51, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00014084268286118905, + "loss": 0.7668, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00013953208434531068, + "loss": 0.7621, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9087182275326549, + "eval_loss": 0.8042978644371033, + "eval_precision": 0.905902063489189, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.2223, + "eval_samples_per_second": 438.808, + "eval_steps_per_second": 3.527, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00013782330386545165, + "loss": 0.7604, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00013572629442718738, + "loss": 0.7468, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9079445057888318, + "eval_loss": 0.8031318187713623, + "eval_precision": 0.906246100705943, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.4598, + "eval_samples_per_second": 483.661, + "eval_steps_per_second": 3.888, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013325327032420668, + "loss": 0.7406, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9091717290526049, + "eval_loss": 0.8017144799232483, + "eval_precision": 0.9089365035738217, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.7183, + "eval_samples_per_second": 467.459, + "eval_steps_per_second": 3.757, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.000130418635994631, + "loss": 0.7424, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00012723890212056315, + "loss": 0.7351, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.909637696849185, + "eval_loss": 0.8041776418685913, + "eval_precision": 0.907971736250602, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.8376, + "eval_samples_per_second": 460.344, + "eval_steps_per_second": 3.7, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00012373258945955516, + "loss": 0.7285, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8995224089930041, + "eval_loss": 0.8275798559188843, + "eval_precision": 0.9037905718861858, + "eval_recall": 0.8977272727272727, + "eval_runtime": 7.9177, + "eval_samples_per_second": 455.687, + "eval_steps_per_second": 3.663, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00011992012096813975, + "loss": 0.7373, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00011582370284576647, + "loss": 0.7231, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9109734472081614, + "eval_loss": 0.8144450783729553, + "eval_precision": 0.9118659094815363, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.9411, + "eval_samples_per_second": 454.347, + "eval_steps_per_second": 3.652, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001114671951920162, + "loss": 0.7184, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001068759730304676, + "loss": 0.7174, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.909135405812888, + "eval_loss": 0.8156898021697998, + "eval_precision": 0.909278592037413, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.6586, + "eval_samples_per_second": 471.102, + "eval_steps_per_second": 3.787, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00010207677850869928, + "loss": 0.7142, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9007945355514471, + "eval_loss": 0.8279581069946289, + "eval_precision": 0.9038038003958175, + "eval_recall": 0.9113082039911308, + "eval_runtime": 8.215, + "eval_samples_per_second": 439.197, + "eval_steps_per_second": 3.53, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 9.709756513530751e-05, + "loss": 0.7159, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 9.196733496119976e-05, + "loss": 0.7126, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9091109781814377, + "eval_loss": 0.8070549964904785, + "eval_precision": 0.9071926347508509, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.7635, + "eval_samples_per_second": 464.739, + "eval_steps_per_second": 3.735, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 8.671596965352283e-05, + "loss": 0.709, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.137405644615612e-05, + "loss": 0.7136, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9086646217115045, + "eval_loss": 0.8041656613349915, + "eval_precision": 0.9059512860490292, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.7847, + "eval_samples_per_second": 463.47, + "eval_steps_per_second": 3.725, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 7.597270998054306e-05, + "loss": 0.7042, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9128043301325549, + "eval_loss": 0.802530825138092, + "eval_precision": 0.909940273485026, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.0282, + "eval_samples_per_second": 449.416, + "eval_steps_per_second": 3.612, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.054339107457148e-05, + "loss": 0.7056, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 6.511772347510579e-05, + "loss": 0.705, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9129191777560949, + "eval_loss": 0.803310751914978, + "eval_precision": 0.9120994550110403, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.7469, + "eval_samples_per_second": 465.738, + "eval_steps_per_second": 3.743, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.972730966151907e-05, + "loss": 0.7025, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.4403546773100375e-05, + "loss": 0.7021, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.910654399792804, + "eval_loss": 0.8065840601921082, + "eval_precision": 0.9069881516005327, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.6522, + "eval_samples_per_second": 471.495, + "eval_steps_per_second": 3.79, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.917744373249089e-05, + "loss": 0.6996, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8980044345898004, + "eval_f1": 0.8997929728892949, + "eval_loss": 0.829388439655304, + "eval_precision": 0.9030476989737719, + "eval_recall": 0.8980044345898004, + "eval_runtime": 7.7949, + "eval_samples_per_second": 462.869, + "eval_steps_per_second": 3.72, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.407944063033655e-05, + "loss": 0.7012, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.913923142317388e-05, + "loss": 0.7001, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9136259289469392, + "eval_loss": 0.8053127527236938, + "eval_precision": 0.9139408798257811, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9179, + "eval_samples_per_second": 455.676, + "eval_steps_per_second": 3.663, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.438559097726694e-05, + "loss": 0.6969, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9096417918638233, + "eval_loss": 0.809673011302948, + "eval_precision": 0.9068594859972906, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.0512, + "eval_samples_per_second": 448.132, + "eval_steps_per_second": 3.602, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.984620746580088e-05, + "loss": 0.6993, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.5547521095654953e-05, + "loss": 0.696, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.9176529036407934, + "eval_loss": 0.8008747100830078, + "eval_precision": 0.9168418370864793, + "eval_recall": 0.9221175166297118, + "eval_runtime": 7.6325, + "eval_samples_per_second": 472.715, + "eval_steps_per_second": 3.8, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.151457010311128e-05, + "loss": 0.695, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.7770844915516967e-05, + "loss": 0.694, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9152280318347947, + "eval_loss": 0.806031346321106, + "eval_precision": 0.9134102682528302, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.9838, + "eval_samples_per_second": 451.917, + "eval_steps_per_second": 3.632, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.4338151328352614e-05, + "loss": 0.6966, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9163489585109316, + "eval_loss": 0.8020098805427551, + "eval_precision": 0.9136792717065519, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.4971, + "eval_samples_per_second": 481.253, + "eval_steps_per_second": 3.868, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.1236483494649816e-05, + "loss": 0.6903, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 8.483907466546861e-06, + "loss": 0.6927, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9141445612391859, + "eval_loss": 0.7987990975379944, + "eval_precision": 0.9113136140366306, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.8726, + "eval_samples_per_second": 458.299, + "eval_steps_per_second": 3.684, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.096455967308938e-06, + "loss": 0.6934, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.088035006726664e-06, + "loss": 0.6917, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9129174846245673, + "eval_loss": 0.8045623302459717, + "eval_precision": 0.910341298839254, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.1537, + "eval_samples_per_second": 442.5, + "eval_steps_per_second": 3.557, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.470342883823302e-06, + "loss": 0.6921, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9142830868671395, + "eval_loss": 0.8011919856071472, + "eval_precision": 0.9112303017075651, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.8833, + "eval_samples_per_second": 457.676, + "eval_steps_per_second": 3.679, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.2528020486492668e-06, + "loss": 0.6925, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.4250422004359235e-07, + "loss": 0.6943, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9246119733924612, + "eval_f1": 0.9210302061521183, + "eval_loss": 0.7939128279685974, + "eval_precision": 0.919001696254973, + "eval_recall": 0.9246119733924612, + "eval_runtime": 7.9937, + "eval_samples_per_second": 451.358, + "eval_steps_per_second": 3.628, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 4.416907903081578e-08, + "loss": 0.6882, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9125667738801371, + "eval_loss": 0.8062209486961365, + "eval_precision": 0.9108347477984143, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.6698, + "eval_samples_per_second": 470.415, + "eval_steps_per_second": 3.781, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.49975930387655326, + "learning_rate": 0.00014234053032319, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-bby12rkd/checkpoint-630/training_args.bin b/run-bby12rkd/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8700cb056558c8137c9c3cdfa80db5e0d7edfe49 --- /dev/null +++ b/run-bby12rkd/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35f6efd6d394394c556a96b1b6be2a98bff9787600cafbe2fd849f30239f4585 +size 4792 diff --git a/run-c2tfrkgd/checkpoint-1260/model.safetensors b/run-c2tfrkgd/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e33b177a107c99ddabb2ad455cccc2928ab8e117 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7d1402e1ef7640eb817cd1e7218743c38ff7419ce1784abc50c37fa41cfa846 +size 198025308 diff --git a/run-c2tfrkgd/checkpoint-1260/optimizer.pt b/run-c2tfrkgd/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..af6f834c83828e7689baed5aa9a683b208be8440 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f83c743f77dff75853d6afefbc701a7de341890e391b8bb9036bd922e7e1cb9e +size 395900602 diff --git a/run-c2tfrkgd/checkpoint-1260/rng_state.pth b/run-c2tfrkgd/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-c2tfrkgd/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-c2tfrkgd/checkpoint-1260/scheduler.pt b/run-c2tfrkgd/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..73e2a2fef3c09d63f638d1b6f49ac8435418e9d4 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:269fc3cff4cd0e366f264cb0ce336bfa1ac14f2a4396294d7475ec1d054b7717 +size 1064 diff --git a/run-c2tfrkgd/checkpoint-1260/trainer_state.json b/run-c2tfrkgd/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3147a4eada13599d6dd4aa6b0b384e7d5cb302b5 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9246119733924612, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-c2tfrkgd/checkpoint-977", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.0794009379974673e-05, + "loss": 1.3714, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8367516629711752, + "eval_loss": 0.9335779547691345, + "eval_runtime": 6.7521, + "eval_samples_per_second": 534.354, + "eval_steps_per_second": 8.442, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.1588018759949346e-05, + "loss": 0.9546, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 6.238202813992403e-05, + "loss": 0.8522, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8132358193397522, + "eval_runtime": 6.7901, + "eval_samples_per_second": 531.361, + "eval_steps_per_second": 8.395, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 8.317603751989869e-05, + "loss": 0.8113, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8222821354866028, + "eval_runtime": 6.8073, + "eval_samples_per_second": 530.022, + "eval_steps_per_second": 8.373, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010397004689987338, + "loss": 0.7951, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00012476405627984805, + "loss": 0.7831, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.8678393959999084, + "eval_runtime": 6.8015, + "eval_samples_per_second": 530.471, + "eval_steps_per_second": 8.38, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014555806565982274, + "loss": 0.7788, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00016635207503979738, + "loss": 0.7752, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8021356463432312, + "eval_runtime": 6.7337, + "eval_samples_per_second": 535.81, + "eval_steps_per_second": 8.465, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00018714608441977208, + "loss": 0.7686, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9002217294900222, + "eval_loss": 0.8268948197364807, + "eval_runtime": 6.7951, + "eval_samples_per_second": 530.971, + "eval_steps_per_second": 8.388, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.000195024392952394, + "loss": 0.7623, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00019432211465431328, + "loss": 0.7515, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8011595010757446, + "eval_runtime": 6.6584, + "eval_samples_per_second": 541.872, + "eval_steps_per_second": 8.561, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00019299484058765264, + "loss": 0.7576, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00019105114479634638, + "loss": 0.7482, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8145611882209778, + "eval_runtime": 6.6255, + "eval_samples_per_second": 544.563, + "eval_steps_per_second": 8.603, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00018850358334061477, + "loss": 0.7372, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8924611973392461, + "eval_loss": 0.8415437936782837, + "eval_runtime": 6.7936, + "eval_samples_per_second": 531.089, + "eval_steps_per_second": 8.39, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.000185368613186201, + "loss": 0.7427, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00018166648589418963, + "loss": 0.7383, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8149128556251526, + "eval_runtime": 6.6284, + "eval_samples_per_second": 544.328, + "eval_steps_per_second": 8.599, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00017742111679815763, + "loss": 0.7338, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8098114728927612, + "eval_runtime": 6.7556, + "eval_samples_per_second": 534.076, + "eval_steps_per_second": 8.437, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00017265993051376, + "loss": 0.7257, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00016741368377874027, + "loss": 0.7235, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8138856887817383, + "eval_runtime": 6.5164, + "eval_samples_per_second": 553.681, + "eval_steps_per_second": 8.747, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00016171626676780043, + "loss": 0.7251, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.000155604484165814, + "loss": 0.7221, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8091408610343933, + "eval_runtime": 6.6935, + "eval_samples_per_second": 539.03, + "eval_steps_per_second": 8.516, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00014911781741362503, + "loss": 0.7171, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9002217294900222, + "eval_loss": 0.8231325745582581, + "eval_runtime": 6.5149, + "eval_samples_per_second": 553.807, + "eval_steps_per_second": 8.749, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00014229816966229917, + "loss": 0.7217, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00013518959508339415, + "loss": 0.7105, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.810154139995575, + "eval_runtime": 6.4313, + "eval_samples_per_second": 561.01, + "eval_steps_per_second": 8.863, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00012783801428387513, + "loss": 0.7113, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00012029091766406365, + "loss": 0.7088, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9079822616407982, + "eval_loss": 0.8208918571472168, + "eval_runtime": 6.8431, + "eval_samples_per_second": 527.244, + "eval_steps_per_second": 8.33, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00011259705863489463, + "loss": 0.7093, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8121762275695801, + "eval_runtime": 6.7258, + "eval_samples_per_second": 536.439, + "eval_steps_per_second": 8.475, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00010480613867626428, + "loss": 0.7018, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 9.696848627095721e-05, + "loss": 0.7015, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8108313679695129, + "eval_runtime": 6.6607, + "eval_samples_per_second": 541.685, + "eval_steps_per_second": 8.558, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 8.913473178820412e-05, + "loss": 0.7016, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 8.135548041708633e-05, + "loss": 0.7019, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8144015073776245, + "eval_runtime": 6.7953, + "eval_samples_per_second": 530.956, + "eval_steps_per_second": 8.388, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 7.368098526260147e-05, + "loss": 0.7068, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8046088814735413, + "eval_runtime": 6.7578, + "eval_samples_per_second": 533.903, + "eval_steps_per_second": 8.435, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 6.616082271615306e-05, + "loss": 0.6961, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 5.884357219753575e-05, + "loss": 0.694, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.808207631111145, + "eval_runtime": 6.7326, + "eval_samples_per_second": 535.902, + "eval_steps_per_second": 8.466, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 5.177650233724663e-05, + "loss": 0.6918, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8090046048164368, + "eval_runtime": 6.7557, + "eval_samples_per_second": 534.07, + "eval_steps_per_second": 8.437, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 4.500526562635087e-05, + "loss": 0.6922, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.857360350642985e-05, + "loss": 0.6923, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.8001089692115784, + "eval_runtime": 6.8153, + "eval_samples_per_second": 529.397, + "eval_steps_per_second": 8.364, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 3.252306380469745e-05, + "loss": 0.6917, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.689273233962359e-05, + "loss": 0.692, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.7973278164863586, + "eval_runtime": 6.5801, + "eval_samples_per_second": 548.321, + "eval_steps_per_second": 8.662, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 2.171898043086191e-05, + "loss": 0.6874, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8056667447090149, + "eval_runtime": 6.9859, + "eval_samples_per_second": 516.468, + "eval_steps_per_second": 8.159, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.7035229944540563e-05, + "loss": 0.6885, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.2871737391696719e-05, + "loss": 0.6867, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9235033259423503, + "eval_loss": 0.8018417358398438, + "eval_runtime": 6.5582, + "eval_samples_per_second": 550.153, + "eval_steps_per_second": 8.691, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 9.255398474555834e-06, + "loss": 0.6891, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 6.209574343264626e-06, + "loss": 0.6918, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8045929074287415, + "eval_runtime": 6.481, + "eval_samples_per_second": 556.707, + "eval_steps_per_second": 8.795, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 3.7539406854403066e-06, + "loss": 0.688, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.799360454082489, + "eval_runtime": 6.7492, + "eval_samples_per_second": 534.584, + "eval_steps_per_second": 8.445, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.904360623400833e-06, + "loss": 0.6839, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 6.727822401460873e-07, + "loss": 0.6852, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8041695952415466, + "eval_runtime": 6.791, + "eval_samples_per_second": 531.294, + "eval_steps_per_second": 8.394, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 6.716139606080874e-08, + "loss": 0.6888, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7969141006469727, + "eval_runtime": 6.5918, + "eval_samples_per_second": 547.344, + "eval_steps_per_second": 8.647, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00019514378033514695, + "metric": "eval/loss", + "warmup_ratio": 0.19314588133184352 + } +} diff --git a/run-c2tfrkgd/checkpoint-1260/training_args.bin b/run-c2tfrkgd/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f16006bddd8c2bfebbf435bdbaffb24acc479f2 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a648f3869dfdfbafeb08e11e12c048e5120b30def7247feb881775125b3116 +size 4792 diff --git a/run-c2tfrkgd/checkpoint-977/model.safetensors b/run-c2tfrkgd/checkpoint-977/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca1b937963a7d58ebb8388c306dc7b83943881e5 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-977/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8346e13b560f58a8b42586f9e565e46ac94be31eaabb37b54811f485552a0e2 +size 198025308 diff --git a/run-c2tfrkgd/checkpoint-977/optimizer.pt b/run-c2tfrkgd/checkpoint-977/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0ff0a2207a723fe4bc63b231175a531253e8c30 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-977/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95ae595d965ac09b1b8ca6977b0575ea37c7bf91df7e5c706b2e52108fbd11ea +size 395900602 diff --git a/run-c2tfrkgd/checkpoint-977/rng_state.pth b/run-c2tfrkgd/checkpoint-977/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..64040e071cfd2b36efe8671a5c39c1cd45bfe765 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-977/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4edcf0c243ed806f9d2d5b881df10dc45598da934166b759eaaf7ff61a7accc3 +size 14244 diff --git a/run-c2tfrkgd/checkpoint-977/scheduler.pt b/run-c2tfrkgd/checkpoint-977/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ef30ea7744936fa9ea4c7751d466fc537f6acd4 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-977/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b5094d32f0565fd8b3043530f3f234df15f6663712c14b11c736127bdf1617d +size 1064 diff --git a/run-c2tfrkgd/checkpoint-977/trainer_state.json b/run-c2tfrkgd/checkpoint-977/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cce700235237d3163612ecccae6884332f045d1b --- /dev/null +++ b/run-c2tfrkgd/checkpoint-977/trainer_state.json @@ -0,0 +1,456 @@ +{ + "best_metric": 0.9246119733924612, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-c2tfrkgd/checkpoint-977", + "epoch": 22.988235294117647, + "eval_steps": 500, + "global_step": 977, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.0794009379974673e-05, + "loss": 1.3714, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8367516629711752, + "eval_loss": 0.9335779547691345, + "eval_runtime": 6.7521, + "eval_samples_per_second": 534.354, + "eval_steps_per_second": 8.442, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.1588018759949346e-05, + "loss": 0.9546, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 6.238202813992403e-05, + "loss": 0.8522, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8132358193397522, + "eval_runtime": 6.7901, + "eval_samples_per_second": 531.361, + "eval_steps_per_second": 8.395, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 8.317603751989869e-05, + "loss": 0.8113, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8222821354866028, + "eval_runtime": 6.8073, + "eval_samples_per_second": 530.022, + "eval_steps_per_second": 8.373, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00010397004689987338, + "loss": 0.7951, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00012476405627984805, + "loss": 0.7831, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.8678393959999084, + "eval_runtime": 6.8015, + "eval_samples_per_second": 530.471, + "eval_steps_per_second": 8.38, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014555806565982274, + "loss": 0.7788, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00016635207503979738, + "loss": 0.7752, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8021356463432312, + "eval_runtime": 6.7337, + "eval_samples_per_second": 535.81, + "eval_steps_per_second": 8.465, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00018714608441977208, + "loss": 0.7686, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9002217294900222, + "eval_loss": 0.8268948197364807, + "eval_runtime": 6.7951, + "eval_samples_per_second": 530.971, + "eval_steps_per_second": 8.388, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.000195024392952394, + "loss": 0.7623, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00019432211465431328, + "loss": 0.7515, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8011595010757446, + "eval_runtime": 6.6584, + "eval_samples_per_second": 541.872, + "eval_steps_per_second": 8.561, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00019299484058765264, + "loss": 0.7576, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00019105114479634638, + "loss": 0.7482, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8145611882209778, + "eval_runtime": 6.6255, + "eval_samples_per_second": 544.563, + "eval_steps_per_second": 8.603, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00018850358334061477, + "loss": 0.7372, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8924611973392461, + "eval_loss": 0.8415437936782837, + "eval_runtime": 6.7936, + "eval_samples_per_second": 531.089, + "eval_steps_per_second": 8.39, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.000185368613186201, + "loss": 0.7427, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00018166648589418963, + "loss": 0.7383, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8149128556251526, + "eval_runtime": 6.6284, + "eval_samples_per_second": 544.328, + "eval_steps_per_second": 8.599, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00017742111679815763, + "loss": 0.7338, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8098114728927612, + "eval_runtime": 6.7556, + "eval_samples_per_second": 534.076, + "eval_steps_per_second": 8.437, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00017265993051376, + "loss": 0.7257, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00016741368377874027, + "loss": 0.7235, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8138856887817383, + "eval_runtime": 6.5164, + "eval_samples_per_second": 553.681, + "eval_steps_per_second": 8.747, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00016171626676780043, + "loss": 0.7251, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.000155604484165814, + "loss": 0.7221, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8091408610343933, + "eval_runtime": 6.6935, + "eval_samples_per_second": 539.03, + "eval_steps_per_second": 8.516, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00014911781741362503, + "loss": 0.7171, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9002217294900222, + "eval_loss": 0.8231325745582581, + "eval_runtime": 6.5149, + "eval_samples_per_second": 553.807, + "eval_steps_per_second": 8.749, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00014229816966229917, + "loss": 0.7217, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00013518959508339415, + "loss": 0.7105, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.810154139995575, + "eval_runtime": 6.4313, + "eval_samples_per_second": 561.01, + "eval_steps_per_second": 8.863, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00012783801428387513, + "loss": 0.7113, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00012029091766406365, + "loss": 0.7088, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9079822616407982, + "eval_loss": 0.8208918571472168, + "eval_runtime": 6.8431, + "eval_samples_per_second": 527.244, + "eval_steps_per_second": 8.33, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00011259705863489463, + "loss": 0.7093, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8121762275695801, + "eval_runtime": 6.7258, + "eval_samples_per_second": 536.439, + "eval_steps_per_second": 8.475, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00010480613867626428, + "loss": 0.7018, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 9.696848627095721e-05, + "loss": 0.7015, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8108313679695129, + "eval_runtime": 6.6607, + "eval_samples_per_second": 541.685, + "eval_steps_per_second": 8.558, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 8.913473178820412e-05, + "loss": 0.7016, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 8.135548041708633e-05, + "loss": 0.7019, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8144015073776245, + "eval_runtime": 6.7953, + "eval_samples_per_second": 530.956, + "eval_steps_per_second": 8.388, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 7.368098526260147e-05, + "loss": 0.7068, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8046088814735413, + "eval_runtime": 6.7578, + "eval_samples_per_second": 533.903, + "eval_steps_per_second": 8.435, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 6.616082271615306e-05, + "loss": 0.6961, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 5.884357219753575e-05, + "loss": 0.694, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.808207631111145, + "eval_runtime": 6.7326, + "eval_samples_per_second": 535.902, + "eval_steps_per_second": 8.466, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 5.177650233724663e-05, + "loss": 0.6918, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8090046048164368, + "eval_runtime": 6.7557, + "eval_samples_per_second": 534.07, + "eval_steps_per_second": 8.437, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 4.500526562635087e-05, + "loss": 0.6922, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.857360350642985e-05, + "loss": 0.6923, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.8001089692115784, + "eval_runtime": 6.8153, + "eval_samples_per_second": 529.397, + "eval_steps_per_second": 8.364, + "step": 977 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00019514378033514695, + "metric": "eval/loss", + "warmup_ratio": 0.19314588133184352 + } +} diff --git a/run-c2tfrkgd/checkpoint-977/training_args.bin b/run-c2tfrkgd/checkpoint-977/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f16006bddd8c2bfebbf435bdbaffb24acc479f2 --- /dev/null +++ b/run-c2tfrkgd/checkpoint-977/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a648f3869dfdfbafeb08e11e12c048e5120b30def7247feb881775125b3116 +size 4792 diff --git a/run-cg9vdyfg/checkpoint-1232/model.safetensors b/run-cg9vdyfg/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f389ad48f5149d9b6174c7c14315e6f948f8c6b --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9bb128ed20cd82f6a780dada062b32eb1182ae0b9c4f6e2c3054070461e92ab +size 198025308 diff --git a/run-cg9vdyfg/checkpoint-1232/optimizer.pt b/run-cg9vdyfg/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a04a24ad59e511a7a4c43f02d1a8aacf6a44274 --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c28c6370b79a66032e7013a3fedded0de92633c5ef6aa33823ad24a3be06c3c +size 395900602 diff --git a/run-cg9vdyfg/checkpoint-1232/rng_state.pth b/run-cg9vdyfg/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-cg9vdyfg/checkpoint-1232/scheduler.pt b/run-cg9vdyfg/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..32f8a1180072d32a02a8d86963f4bec516fe02b8 --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14764abcb0aec1afe99e0b2b423d7f05bec45a7df791b7349cfa6424aced2bd9 +size 1064 diff --git a/run-cg9vdyfg/checkpoint-1232/trainer_state.json b/run-cg9vdyfg/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..855f118d983156cba503caf37bf61f63cd26bdda --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9204545454545454, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-cg9vdyfg/checkpoint-1190", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.434697759321085e-05, + "loss": 1.2999, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8733370288248337, + "eval_loss": 0.9329584836959839, + "eval_runtime": 6.8445, + "eval_samples_per_second": 527.141, + "eval_steps_per_second": 8.328, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 6.86939551864217e-05, + "loss": 0.922, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00010304093277963255, + "loss": 0.8301, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8110005259513855, + "eval_runtime": 6.8898, + "eval_samples_per_second": 523.676, + "eval_steps_per_second": 8.273, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001373879103728434, + "loss": 0.8046, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.8447400331497192, + "eval_runtime": 6.5622, + "eval_samples_per_second": 549.814, + "eval_steps_per_second": 8.686, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00017173488796605425, + "loss": 0.7948, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0002060818655592651, + "loss": 0.7824, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8921840354767184, + "eval_loss": 0.8460314273834229, + "eval_runtime": 6.9594, + "eval_samples_per_second": 518.439, + "eval_steps_per_second": 8.19, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00024042884315247595, + "loss": 0.7786, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0002747758207456868, + "loss": 0.7737, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8295722007751465, + "eval_runtime": 6.6468, + "eval_samples_per_second": 542.814, + "eval_steps_per_second": 8.575, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00030912279833889765, + "loss": 0.7727, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8446621298789978, + "eval_runtime": 6.8878, + "eval_samples_per_second": 523.827, + "eval_steps_per_second": 8.276, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003434697759321085, + "loss": 0.7739, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00037781675352531935, + "loss": 0.7646, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8475609756097561, + "eval_loss": 0.9197834134101868, + "eval_runtime": 6.8425, + "eval_samples_per_second": 527.293, + "eval_steps_per_second": 8.33, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004121637311185302, + "loss": 0.7759, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.000446510708711741, + "loss": 0.7667, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8361256122589111, + "eval_runtime": 6.7843, + "eval_samples_per_second": 531.815, + "eval_steps_per_second": 8.402, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0004688539189816653, + "loss": 0.7668, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8741685144124168, + "eval_loss": 0.8895032405853271, + "eval_runtime": 6.6878, + "eval_samples_per_second": 539.491, + "eval_steps_per_second": 8.523, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00046723977419569517, + "loss": 0.785, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00046373086776333415, + "loss": 0.772, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8852549889135255, + "eval_loss": 0.8673021197319031, + "eval_runtime": 6.9748, + "eval_samples_per_second": 517.294, + "eval_steps_per_second": 8.172, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00045835576415874266, + "loss": 0.7644, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8388532400131226, + "eval_runtime": 6.8433, + "eval_samples_per_second": 527.229, + "eval_steps_per_second": 8.329, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00045115821975188466, + "loss": 0.7586, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0004421968266070375, + "loss": 0.7599, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8888580931263859, + "eval_loss": 0.8525446653366089, + "eval_runtime": 6.4574, + "eval_samples_per_second": 558.741, + "eval_steps_per_second": 8.827, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004315445355103968, + "loss": 0.7599, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00041928806210959945, + "loss": 0.7525, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8874722838137472, + "eval_loss": 0.8753782510757446, + "eval_runtime": 6.7853, + "eval_samples_per_second": 531.742, + "eval_steps_per_second": 8.401, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0004055271809995239, + "loss": 0.7517, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.849846363067627, + "eval_runtime": 6.8284, + "eval_samples_per_second": 528.38, + "eval_steps_per_second": 8.347, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003903739135009067, + "loss": 0.7538, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003739516157437141, + "loss": 0.7434, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8409960865974426, + "eval_runtime": 6.8263, + "eval_samples_per_second": 528.544, + "eval_steps_per_second": 8.35, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003563939744787853, + "loss": 0.7426, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003378439187924033, + "loss": 0.7355, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8489724397659302, + "eval_runtime": 6.6033, + "eval_samples_per_second": 546.396, + "eval_steps_per_second": 8.632, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003184524565830524, + "loss": 0.7392, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8424054384231567, + "eval_runtime": 6.8192, + "eval_samples_per_second": 529.09, + "eval_steps_per_second": 8.359, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002983774452720942, + "loss": 0.7325, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002777823067554717, + "loss": 0.7269, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8401074409484863, + "eval_runtime": 6.7753, + "eval_samples_per_second": 532.522, + "eval_steps_per_second": 8.413, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002568346970574557, + "loss": 0.7182, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0002357051415162042, + "loss": 0.7169, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8330404162406921, + "eval_runtime": 6.4769, + "eval_samples_per_second": 557.056, + "eval_steps_per_second": 8.8, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002145656466114889, + "loss": 0.7207, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9010532150776053, + "eval_loss": 0.8300851583480835, + "eval_runtime": 6.8194, + "eval_samples_per_second": 529.076, + "eval_steps_per_second": 8.358, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00019358829973509727, + "loss": 0.7112, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00017294386830256326, + "loss": 0.7091, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8198403120040894, + "eval_runtime": 6.7498, + "eval_samples_per_second": 534.534, + "eval_steps_per_second": 8.445, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001528004096102495, + "loss": 0.704, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8218609094619751, + "eval_runtime": 6.7248, + "eval_samples_per_second": 536.524, + "eval_steps_per_second": 8.476, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001333219027543244, + "loss": 0.709, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001146669137485862, + "loss": 0.7015, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.809196412563324, + "eval_runtime": 6.8863, + "eval_samples_per_second": 523.938, + "eval_steps_per_second": 8.277, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.698730470782459e-05, + "loss": 0.7011, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 8.042699760469577e-05, + "loss": 0.6978, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8120192885398865, + "eval_runtime": 6.627, + "eval_samples_per_second": 544.439, + "eval_steps_per_second": 8.601, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.512080266382531e-05, + "loss": 0.696, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8116884827613831, + "eval_runtime": 6.6425, + "eval_samples_per_second": 543.166, + "eval_steps_per_second": 8.581, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 5.1193320930670185e-05, + "loss": 0.6953, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.875792994884849e-05, + "loss": 0.6931, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8188387751579285, + "eval_runtime": 6.672, + "eval_samples_per_second": 540.77, + "eval_steps_per_second": 8.543, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.7915860803093363e-05, + "loss": 0.6905, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.8755374041222165e-05, + "loss": 0.6957, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8074535131454468, + "eval_runtime": 6.7549, + "eval_samples_per_second": 534.129, + "eval_steps_per_second": 8.438, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.1351041183578949e-05, + "loss": 0.6906, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.8062577843666077, + "eval_runtime": 6.9758, + "eval_samples_per_second": 517.217, + "eval_steps_per_second": 8.171, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 5.763137668869398e-06, + "loss": 0.6855, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.0371521781494553e-06, + "loss": 0.6879, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8047950863838196, + "eval_runtime": 6.6828, + "eval_samples_per_second": 539.891, + "eval_steps_per_second": 8.529, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.000468968347907302, + "metric": "eval/loss", + "warmup_ratio": 0.28097147522173666 + } +} diff --git a/run-cg9vdyfg/checkpoint-1232/training_args.bin b/run-cg9vdyfg/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7356f692a647382895ac55d424515eab17a18dfd --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf97bc60205909d36af7836b6ee4c611d2b56c91c8bd062ad1d8b7beab40d18 +size 4792 diff --git a/run-cg9vdyfg/checkpoint-1260/model.safetensors b/run-cg9vdyfg/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75d66e6f6f2240332965fa7045216c0f681c88a9 --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7e4272ae350a3163cf7533e24b17f636cd2df1c6cce5d8f38f04de7905d61ea +size 198025308 diff --git a/run-cg9vdyfg/checkpoint-1260/optimizer.pt b/run-cg9vdyfg/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c52047003440f584bfc38286634f89dd03ab24b --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898742b29d78673feaad61d0a2d04a82bd88d4526ebee65c22ad434b2c4c699d +size 395900602 diff --git a/run-cg9vdyfg/checkpoint-1260/rng_state.pth b/run-cg9vdyfg/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-cg9vdyfg/checkpoint-1260/scheduler.pt b/run-cg9vdyfg/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fca6be3643ede33e88c2895b5434f5bffde8b630 --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003f4a53fd98598e3b0fc18a60c2948036de839f7fc09a6935cbc2740965e71c +size 1064 diff --git a/run-cg9vdyfg/checkpoint-1260/trainer_state.json b/run-cg9vdyfg/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..84487d847ade377f6503f4601ff77b7a60d9360c --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9207317073170732, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-cg9vdyfg/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.434697759321085e-05, + "loss": 1.2999, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8733370288248337, + "eval_loss": 0.9329584836959839, + "eval_runtime": 6.8445, + "eval_samples_per_second": 527.141, + "eval_steps_per_second": 8.328, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 6.86939551864217e-05, + "loss": 0.922, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00010304093277963255, + "loss": 0.8301, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8110005259513855, + "eval_runtime": 6.8898, + "eval_samples_per_second": 523.676, + "eval_steps_per_second": 8.273, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001373879103728434, + "loss": 0.8046, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.8447400331497192, + "eval_runtime": 6.5622, + "eval_samples_per_second": 549.814, + "eval_steps_per_second": 8.686, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00017173488796605425, + "loss": 0.7948, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0002060818655592651, + "loss": 0.7824, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8921840354767184, + "eval_loss": 0.8460314273834229, + "eval_runtime": 6.9594, + "eval_samples_per_second": 518.439, + "eval_steps_per_second": 8.19, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00024042884315247595, + "loss": 0.7786, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0002747758207456868, + "loss": 0.7737, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8295722007751465, + "eval_runtime": 6.6468, + "eval_samples_per_second": 542.814, + "eval_steps_per_second": 8.575, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00030912279833889765, + "loss": 0.7727, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8446621298789978, + "eval_runtime": 6.8878, + "eval_samples_per_second": 523.827, + "eval_steps_per_second": 8.276, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003434697759321085, + "loss": 0.7739, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00037781675352531935, + "loss": 0.7646, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8475609756097561, + "eval_loss": 0.9197834134101868, + "eval_runtime": 6.8425, + "eval_samples_per_second": 527.293, + "eval_steps_per_second": 8.33, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004121637311185302, + "loss": 0.7759, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.000446510708711741, + "loss": 0.7667, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8361256122589111, + "eval_runtime": 6.7843, + "eval_samples_per_second": 531.815, + "eval_steps_per_second": 8.402, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0004688539189816653, + "loss": 0.7668, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8741685144124168, + "eval_loss": 0.8895032405853271, + "eval_runtime": 6.6878, + "eval_samples_per_second": 539.491, + "eval_steps_per_second": 8.523, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00046723977419569517, + "loss": 0.785, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00046373086776333415, + "loss": 0.772, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8852549889135255, + "eval_loss": 0.8673021197319031, + "eval_runtime": 6.9748, + "eval_samples_per_second": 517.294, + "eval_steps_per_second": 8.172, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00045835576415874266, + "loss": 0.7644, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8388532400131226, + "eval_runtime": 6.8433, + "eval_samples_per_second": 527.229, + "eval_steps_per_second": 8.329, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00045115821975188466, + "loss": 0.7586, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0004421968266070375, + "loss": 0.7599, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8888580931263859, + "eval_loss": 0.8525446653366089, + "eval_runtime": 6.4574, + "eval_samples_per_second": 558.741, + "eval_steps_per_second": 8.827, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004315445355103968, + "loss": 0.7599, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00041928806210959945, + "loss": 0.7525, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8874722838137472, + "eval_loss": 0.8753782510757446, + "eval_runtime": 6.7853, + "eval_samples_per_second": 531.742, + "eval_steps_per_second": 8.401, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0004055271809995239, + "loss": 0.7517, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.849846363067627, + "eval_runtime": 6.8284, + "eval_samples_per_second": 528.38, + "eval_steps_per_second": 8.347, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003903739135009067, + "loss": 0.7538, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003739516157437141, + "loss": 0.7434, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8409960865974426, + "eval_runtime": 6.8263, + "eval_samples_per_second": 528.544, + "eval_steps_per_second": 8.35, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003563939744787853, + "loss": 0.7426, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003378439187924033, + "loss": 0.7355, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8489724397659302, + "eval_runtime": 6.6033, + "eval_samples_per_second": 546.396, + "eval_steps_per_second": 8.632, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003184524565830524, + "loss": 0.7392, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8424054384231567, + "eval_runtime": 6.8192, + "eval_samples_per_second": 529.09, + "eval_steps_per_second": 8.359, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002983774452720942, + "loss": 0.7325, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002777823067554717, + "loss": 0.7269, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8401074409484863, + "eval_runtime": 6.7753, + "eval_samples_per_second": 532.522, + "eval_steps_per_second": 8.413, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002568346970574557, + "loss": 0.7182, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0002357051415162042, + "loss": 0.7169, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8330404162406921, + "eval_runtime": 6.4769, + "eval_samples_per_second": 557.056, + "eval_steps_per_second": 8.8, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002145656466114889, + "loss": 0.7207, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9010532150776053, + "eval_loss": 0.8300851583480835, + "eval_runtime": 6.8194, + "eval_samples_per_second": 529.076, + "eval_steps_per_second": 8.358, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00019358829973509727, + "loss": 0.7112, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00017294386830256326, + "loss": 0.7091, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8198403120040894, + "eval_runtime": 6.7498, + "eval_samples_per_second": 534.534, + "eval_steps_per_second": 8.445, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001528004096102495, + "loss": 0.704, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8218609094619751, + "eval_runtime": 6.7248, + "eval_samples_per_second": 536.524, + "eval_steps_per_second": 8.476, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001333219027543244, + "loss": 0.709, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001146669137485862, + "loss": 0.7015, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.809196412563324, + "eval_runtime": 6.8863, + "eval_samples_per_second": 523.938, + "eval_steps_per_second": 8.277, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.698730470782459e-05, + "loss": 0.7011, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 8.042699760469577e-05, + "loss": 0.6978, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8120192885398865, + "eval_runtime": 6.627, + "eval_samples_per_second": 544.439, + "eval_steps_per_second": 8.601, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.512080266382531e-05, + "loss": 0.696, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8116884827613831, + "eval_runtime": 6.6425, + "eval_samples_per_second": 543.166, + "eval_steps_per_second": 8.581, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 5.1193320930670185e-05, + "loss": 0.6953, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.875792994884849e-05, + "loss": 0.6931, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8188387751579285, + "eval_runtime": 6.672, + "eval_samples_per_second": 540.77, + "eval_steps_per_second": 8.543, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.7915860803093363e-05, + "loss": 0.6905, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.8755374041222165e-05, + "loss": 0.6957, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8074535131454468, + "eval_runtime": 6.7549, + "eval_samples_per_second": 534.129, + "eval_steps_per_second": 8.438, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.1351041183578949e-05, + "loss": 0.6906, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.8062577843666077, + "eval_runtime": 6.9758, + "eval_samples_per_second": 517.217, + "eval_steps_per_second": 8.171, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 5.763137668869398e-06, + "loss": 0.6855, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.0371521781494553e-06, + "loss": 0.6879, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8047950863838196, + "eval_runtime": 6.6828, + "eval_samples_per_second": 539.891, + "eval_steps_per_second": 8.529, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 2.0341633134782212e-07, + "loss": 0.6927, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.8050925135612488, + "eval_runtime": 6.7784, + "eval_samples_per_second": 532.283, + "eval_steps_per_second": 8.409, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.000468968347907302, + "metric": "eval/loss", + "warmup_ratio": 0.28097147522173666 + } +} diff --git a/run-cg9vdyfg/checkpoint-1260/training_args.bin b/run-cg9vdyfg/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7356f692a647382895ac55d424515eab17a18dfd --- /dev/null +++ b/run-cg9vdyfg/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf97bc60205909d36af7836b6ee4c611d2b56c91c8bd062ad1d8b7beab40d18 +size 4792 diff --git a/run-cibg17i7/checkpoint-1232/model.safetensors b/run-cibg17i7/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a6e466556b45ec29e0394ab81f13d72806098bb --- /dev/null +++ b/run-cibg17i7/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca8fce394d2b2224b49d1cbbca82079f0dcdc939e81f3aed088cee3d2a57cab +size 198025308 diff --git a/run-cibg17i7/checkpoint-1232/optimizer.pt b/run-cibg17i7/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8df8f163d2d33541b10349b8cb2f5fc5fa71fc3 --- /dev/null +++ b/run-cibg17i7/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc9d4d2148def6e5eac0b81d328126bd777e6141eac9f55955360a6888fe87d +size 395900602 diff --git a/run-cibg17i7/checkpoint-1232/rng_state.pth b/run-cibg17i7/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-cibg17i7/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-cibg17i7/checkpoint-1232/scheduler.pt b/run-cibg17i7/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d57e492a55a5699f0c5bd8431f951876ecb7887 --- /dev/null +++ b/run-cibg17i7/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:015821d5d1c1e2e76365c26356a157dcae96349f03a1b928ffa2c449817c235e +size 1064 diff --git a/run-cibg17i7/checkpoint-1232/trainer_state.json b/run-cibg17i7/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0cda855a1735e10dc63c47b0298e2e3009c2b39d --- /dev/null +++ b/run-cibg17i7/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9204545454545454, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-cibg17i7/checkpoint-765", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.389061688106677e-07, + "loss": 1.5309, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5762195121951219, + "eval_loss": 1.4715656042099, + "eval_runtime": 6.869, + "eval_samples_per_second": 525.255, + "eval_steps_per_second": 8.298, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 1.6778123376213353e-06, + "loss": 1.4823, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 2.516718506432003e-06, + "loss": 1.3884, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.811529933481153, + "eval_loss": 1.2890413999557495, + "eval_runtime": 7.0609, + "eval_samples_per_second": 510.984, + "eval_steps_per_second": 8.073, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 3.3556246752426706e-06, + "loss": 1.2555, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 1.043358564376831, + "eval_runtime": 6.8448, + "eval_samples_per_second": 527.112, + "eval_steps_per_second": 8.327, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 4.194530844053338e-06, + "loss": 1.1021, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 5.033437012864006e-06, + "loss": 0.9887, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.832039911308204, + "eval_loss": 0.9410680532455444, + "eval_runtime": 6.7516, + "eval_samples_per_second": 534.389, + "eval_steps_per_second": 8.442, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 5.872343181674674e-06, + "loss": 0.939, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 6.711249350485341e-06, + "loss": 0.9078, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8639135254988913, + "eval_loss": 0.8866722583770752, + "eval_runtime": 6.8225, + "eval_samples_per_second": 528.837, + "eval_steps_per_second": 8.355, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 7.550155519296009e-06, + "loss": 0.8882, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8783259423503326, + "eval_loss": 0.8676007390022278, + "eval_runtime": 6.5247, + "eval_samples_per_second": 552.977, + "eval_steps_per_second": 8.736, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 8.389061688106677e-06, + "loss": 0.8652, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 9.227967856917344e-06, + "loss": 0.8507, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8980044345898004, + "eval_loss": 0.83982253074646, + "eval_runtime": 6.8661, + "eval_samples_per_second": 525.477, + "eval_steps_per_second": 8.302, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 1.0034580911785706e-05, + "loss": 0.8493, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 1.0014580009489772e-05, + "loss": 0.8268, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8221571445465088, + "eval_runtime": 6.558, + "eval_samples_per_second": 550.171, + "eval_steps_per_second": 8.692, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 9.957581025507598e-06, + "loss": 0.8141, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8168759346008301, + "eval_runtime": 6.7883, + "eval_samples_per_second": 531.5, + "eval_steps_per_second": 8.397, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 9.864005960374397e-06, + "loss": 0.8189, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 9.73454761113338e-06, + "loss": 0.8064, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8084390759468079, + "eval_runtime": 6.9707, + "eval_samples_per_second": 517.596, + "eval_steps_per_second": 8.177, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 9.57016444210884e-06, + "loss": 0.7965, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8022477030754089, + "eval_runtime": 6.9167, + "eval_samples_per_second": 521.635, + "eval_steps_per_second": 8.241, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 9.372073488771622e-06, + "loss": 0.7964, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 9.141741347234306e-06, + "loss": 0.7931, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8069414496421814, + "eval_runtime": 6.6539, + "eval_samples_per_second": 542.238, + "eval_steps_per_second": 8.566, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 8.880873316086713e-06, + "loss": 0.7922, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 8.591400770961764e-06, + "loss": 0.7884, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8003499507904053, + "eval_runtime": 6.7618, + "eval_samples_per_second": 533.587, + "eval_steps_per_second": 8.43, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 8.275466865306032e-06, + "loss": 0.7819, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8009287118911743, + "eval_runtime": 6.8644, + "eval_samples_per_second": 525.614, + "eval_steps_per_second": 8.304, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 7.935410663221447e-06, + "loss": 0.7932, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 7.57374982185304e-06, + "loss": 0.781, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8007851243019104, + "eval_runtime": 6.9252, + "eval_samples_per_second": 520.995, + "eval_steps_per_second": 8.231, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 7.193161951536315e-06, + "loss": 0.7869, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 6.796464791707113e-06, + "loss": 0.7738, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8029119372367859, + "eval_runtime": 6.7214, + "eval_samples_per_second": 536.796, + "eval_steps_per_second": 8.48, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 6.386595349344687e-06, + "loss": 0.7825, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8008227944374084, + "eval_runtime": 6.9521, + "eval_samples_per_second": 518.981, + "eval_steps_per_second": 8.199, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 5.966588154399588e-06, + "loss": 0.7785, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 5.539552793195601e-06, + "loss": 0.7772, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7937043905258179, + "eval_runtime": 6.8843, + "eval_samples_per_second": 524.088, + "eval_steps_per_second": 8.28, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 5.108650886140554e-06, + "loss": 0.7707, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 4.677072680194972e-06, + "loss": 0.774, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8021281957626343, + "eval_runtime": 7.0722, + "eval_samples_per_second": 510.17, + "eval_steps_per_second": 8.06, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 4.248013429399745e-06, + "loss": 0.784, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.793916642665863, + "eval_runtime": 6.7073, + "eval_samples_per_second": 537.923, + "eval_steps_per_second": 8.498, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 3.824649738333138e-06, + "loss": 0.773, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 3.410116043641886e-06, + "loss": 0.7675, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.7989597320556641, + "eval_runtime": 6.9819, + "eval_samples_per_second": 516.766, + "eval_steps_per_second": 8.164, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.007481407768904e-06, + "loss": 0.7712, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.7985460758209229, + "eval_runtime": 6.7551, + "eval_samples_per_second": 534.117, + "eval_steps_per_second": 8.438, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 2.6197267966887325e-06, + "loss": 0.7717, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.249723009878407e-06, + "loss": 0.7681, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.7943069338798523, + "eval_runtime": 6.9587, + "eval_samples_per_second": 518.491, + "eval_steps_per_second": 8.191, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.9002094259225356e-06, + "loss": 0.7725, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.5737737211126852e-06, + "loss": 0.7707, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.7946570515632629, + "eval_runtime": 6.6828, + "eval_samples_per_second": 539.895, + "eval_steps_per_second": 8.529, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.2728327111974638e-06, + "loss": 0.7712, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.7998667359352112, + "eval_runtime": 6.6843, + "eval_samples_per_second": 539.774, + "eval_steps_per_second": 8.527, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 9.996144581242867e-07, + "loss": 0.765, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.561417742482552e-07, + "loss": 0.7682, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.7984577417373657, + "eval_runtime": 6.624, + "eval_samples_per_second": 544.682, + "eval_steps_per_second": 8.605, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.44217246137189e-07, + "loss": 0.7693, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.6540988885135367e-07, + "loss": 0.7689, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.7970194220542908, + "eval_runtime": 6.8064, + "eval_samples_per_second": 530.089, + "eval_steps_per_second": 8.374, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.2104352950490452e-07, + "loss": 0.77, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.7927056550979614, + "eval_runtime": 6.8944, + "eval_samples_per_second": 523.325, + "eval_steps_per_second": 8.268, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.1218700611309713e-07, + "loss": 0.7636, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.964625428957748e-08, + "loss": 0.7601, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.793843686580658, + "eval_runtime": 7.0851, + "eval_samples_per_second": 509.239, + "eval_steps_per_second": 8.045, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 1.0034608403850678e-05, + "metric": "eval/loss", + "warmup_ratio": 0.2467313745853508 + } +} diff --git a/run-cibg17i7/checkpoint-1232/training_args.bin b/run-cibg17i7/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..180d7009785546d52c5eccd50af6db4b33bc1268 --- /dev/null +++ b/run-cibg17i7/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398b254ba345cabf0e5641a1df1a28ceb0f7269e053a2a50b68349d9eaa56c22 +size 4792 diff --git a/run-cibg17i7/checkpoint-1260/model.safetensors b/run-cibg17i7/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..196f27c1f5b2578dcbb5c8ab08d1dba6d9ae5472 --- /dev/null +++ b/run-cibg17i7/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a63c934d9e38d6a8e9d801080997513df1c5f7597040fd475b7a452e252d64c7 +size 198025308 diff --git a/run-cibg17i7/checkpoint-1260/optimizer.pt b/run-cibg17i7/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec3cfa7a25eb253e983f9c7e825623a21d57960f --- /dev/null +++ b/run-cibg17i7/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9be55f65e3fab0e68d58f4be0e9baa572f4c81431081d0a44eee2f5348eb12 +size 395900602 diff --git a/run-cibg17i7/checkpoint-1260/rng_state.pth b/run-cibg17i7/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-cibg17i7/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-cibg17i7/checkpoint-1260/scheduler.pt b/run-cibg17i7/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e59d619adfe8a8f9134cb2bb5a15d613ab74d458 --- /dev/null +++ b/run-cibg17i7/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04a7c10278662f0022e09f148799a49557892c2a1c28478465b4ce8deb88dcd +size 1064 diff --git a/run-cibg17i7/checkpoint-1260/trainer_state.json b/run-cibg17i7/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d602d2235350bf5888a95dc27654454b7dbf3d5e --- /dev/null +++ b/run-cibg17i7/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9218403547671841, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-cibg17i7/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.389061688106677e-07, + "loss": 1.5309, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5762195121951219, + "eval_loss": 1.4715656042099, + "eval_runtime": 6.869, + "eval_samples_per_second": 525.255, + "eval_steps_per_second": 8.298, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 1.6778123376213353e-06, + "loss": 1.4823, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 2.516718506432003e-06, + "loss": 1.3884, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.811529933481153, + "eval_loss": 1.2890413999557495, + "eval_runtime": 7.0609, + "eval_samples_per_second": 510.984, + "eval_steps_per_second": 8.073, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 3.3556246752426706e-06, + "loss": 1.2555, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 1.043358564376831, + "eval_runtime": 6.8448, + "eval_samples_per_second": 527.112, + "eval_steps_per_second": 8.327, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 4.194530844053338e-06, + "loss": 1.1021, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 5.033437012864006e-06, + "loss": 0.9887, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.832039911308204, + "eval_loss": 0.9410680532455444, + "eval_runtime": 6.7516, + "eval_samples_per_second": 534.389, + "eval_steps_per_second": 8.442, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 5.872343181674674e-06, + "loss": 0.939, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 6.711249350485341e-06, + "loss": 0.9078, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8639135254988913, + "eval_loss": 0.8866722583770752, + "eval_runtime": 6.8225, + "eval_samples_per_second": 528.837, + "eval_steps_per_second": 8.355, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 7.550155519296009e-06, + "loss": 0.8882, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8783259423503326, + "eval_loss": 0.8676007390022278, + "eval_runtime": 6.5247, + "eval_samples_per_second": 552.977, + "eval_steps_per_second": 8.736, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 8.389061688106677e-06, + "loss": 0.8652, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 9.227967856917344e-06, + "loss": 0.8507, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8980044345898004, + "eval_loss": 0.83982253074646, + "eval_runtime": 6.8661, + "eval_samples_per_second": 525.477, + "eval_steps_per_second": 8.302, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 1.0034580911785706e-05, + "loss": 0.8493, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 1.0014580009489772e-05, + "loss": 0.8268, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8221571445465088, + "eval_runtime": 6.558, + "eval_samples_per_second": 550.171, + "eval_steps_per_second": 8.692, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 9.957581025507598e-06, + "loss": 0.8141, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8168759346008301, + "eval_runtime": 6.7883, + "eval_samples_per_second": 531.5, + "eval_steps_per_second": 8.397, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 9.864005960374397e-06, + "loss": 0.8189, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 9.73454761113338e-06, + "loss": 0.8064, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8084390759468079, + "eval_runtime": 6.9707, + "eval_samples_per_second": 517.596, + "eval_steps_per_second": 8.177, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 9.57016444210884e-06, + "loss": 0.7965, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8022477030754089, + "eval_runtime": 6.9167, + "eval_samples_per_second": 521.635, + "eval_steps_per_second": 8.241, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 9.372073488771622e-06, + "loss": 0.7964, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 9.141741347234306e-06, + "loss": 0.7931, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8069414496421814, + "eval_runtime": 6.6539, + "eval_samples_per_second": 542.238, + "eval_steps_per_second": 8.566, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 8.880873316086713e-06, + "loss": 0.7922, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 8.591400770961764e-06, + "loss": 0.7884, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8003499507904053, + "eval_runtime": 6.7618, + "eval_samples_per_second": 533.587, + "eval_steps_per_second": 8.43, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 8.275466865306032e-06, + "loss": 0.7819, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8009287118911743, + "eval_runtime": 6.8644, + "eval_samples_per_second": 525.614, + "eval_steps_per_second": 8.304, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 7.935410663221447e-06, + "loss": 0.7932, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 7.57374982185304e-06, + "loss": 0.781, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8007851243019104, + "eval_runtime": 6.9252, + "eval_samples_per_second": 520.995, + "eval_steps_per_second": 8.231, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 7.193161951536315e-06, + "loss": 0.7869, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 6.796464791707113e-06, + "loss": 0.7738, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8029119372367859, + "eval_runtime": 6.7214, + "eval_samples_per_second": 536.796, + "eval_steps_per_second": 8.48, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 6.386595349344687e-06, + "loss": 0.7825, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8008227944374084, + "eval_runtime": 6.9521, + "eval_samples_per_second": 518.981, + "eval_steps_per_second": 8.199, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 5.966588154399588e-06, + "loss": 0.7785, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 5.539552793195601e-06, + "loss": 0.7772, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7937043905258179, + "eval_runtime": 6.8843, + "eval_samples_per_second": 524.088, + "eval_steps_per_second": 8.28, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 5.108650886140554e-06, + "loss": 0.7707, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 4.677072680194972e-06, + "loss": 0.774, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8021281957626343, + "eval_runtime": 7.0722, + "eval_samples_per_second": 510.17, + "eval_steps_per_second": 8.06, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 4.248013429399745e-06, + "loss": 0.784, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.793916642665863, + "eval_runtime": 6.7073, + "eval_samples_per_second": 537.923, + "eval_steps_per_second": 8.498, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 3.824649738333138e-06, + "loss": 0.773, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 3.410116043641886e-06, + "loss": 0.7675, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.7989597320556641, + "eval_runtime": 6.9819, + "eval_samples_per_second": 516.766, + "eval_steps_per_second": 8.164, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.007481407768904e-06, + "loss": 0.7712, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.7985460758209229, + "eval_runtime": 6.7551, + "eval_samples_per_second": 534.117, + "eval_steps_per_second": 8.438, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 2.6197267966887325e-06, + "loss": 0.7717, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.249723009878407e-06, + "loss": 0.7681, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.7943069338798523, + "eval_runtime": 6.9587, + "eval_samples_per_second": 518.491, + "eval_steps_per_second": 8.191, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.9002094259225356e-06, + "loss": 0.7725, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.5737737211126852e-06, + "loss": 0.7707, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.7946570515632629, + "eval_runtime": 6.6828, + "eval_samples_per_second": 539.895, + "eval_steps_per_second": 8.529, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.2728327111974638e-06, + "loss": 0.7712, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.7998667359352112, + "eval_runtime": 6.6843, + "eval_samples_per_second": 539.774, + "eval_steps_per_second": 8.527, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 9.996144581242867e-07, + "loss": 0.765, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.561417742482552e-07, + "loss": 0.7682, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.7984577417373657, + "eval_runtime": 6.624, + "eval_samples_per_second": 544.682, + "eval_steps_per_second": 8.605, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.44217246137189e-07, + "loss": 0.7693, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.6540988885135367e-07, + "loss": 0.7689, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.7970194220542908, + "eval_runtime": 6.8064, + "eval_samples_per_second": 530.089, + "eval_steps_per_second": 8.374, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.2104352950490452e-07, + "loss": 0.77, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.7927056550979614, + "eval_runtime": 6.8944, + "eval_samples_per_second": 523.325, + "eval_steps_per_second": 8.268, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.1218700611309713e-07, + "loss": 0.7636, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.964625428957748e-08, + "loss": 0.7601, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.793843686580658, + "eval_runtime": 7.0851, + "eval_samples_per_second": 509.239, + "eval_steps_per_second": 8.045, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 3.958340381072146e-09, + "loss": 0.771, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.7912683486938477, + "eval_runtime": 6.8899, + "eval_samples_per_second": 523.661, + "eval_steps_per_second": 8.273, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 1.0034608403850678e-05, + "metric": "eval/loss", + "warmup_ratio": 0.2467313745853508 + } +} diff --git a/run-cibg17i7/checkpoint-1260/training_args.bin b/run-cibg17i7/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..180d7009785546d52c5eccd50af6db4b33bc1268 --- /dev/null +++ b/run-cibg17i7/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398b254ba345cabf0e5641a1df1a28ceb0f7269e053a2a50b68349d9eaa56c22 +size 4792 diff --git a/run-cpz6d1iz/checkpoint-616/model.safetensors b/run-cpz6d1iz/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf369197483a78cb11c62b6cbbbc6821fa5fdbd3 --- /dev/null +++ b/run-cpz6d1iz/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a9892b074de13148115b69f3769d0022195e4b653ce891bb9241db943c1172e +size 198025308 diff --git a/run-cpz6d1iz/checkpoint-616/optimizer.pt b/run-cpz6d1iz/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1d76f9442b2fdaa224ee8a55aa9dc37144c8e9d --- /dev/null +++ b/run-cpz6d1iz/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b15391136bc04fc62377694f059e4401bbf7b8f57715d27ce3419633b8840706 +size 395900602 diff --git a/run-cpz6d1iz/checkpoint-616/rng_state.pth b/run-cpz6d1iz/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-cpz6d1iz/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-cpz6d1iz/checkpoint-616/scheduler.pt b/run-cpz6d1iz/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..abed55d3a4112c1a30804fb94d43a843a4a54af0 --- /dev/null +++ b/run-cpz6d1iz/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0465b20521d502f9adcb4a6fa7d0aeb3be3066e1b0332acd2e5560792dd00cb1 +size 1064 diff --git a/run-cpz6d1iz/checkpoint-616/trainer_state.json b/run-cpz6d1iz/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..52801c410f827a63cac3cdc1b2cabc6e7e1a8e26 --- /dev/null +++ b/run-cpz6d1iz/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9180412395863843, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-cpz6d1iz/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 9.337297572513965e-06, + "loss": 1.4974, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8245565410199557, + "eval_f1": 0.7504652622410692, + "eval_loss": 1.1778662204742432, + "eval_precision": 0.7010977125485033, + "eval_recall": 0.8245565410199557, + "eval_runtime": 7.9368, + "eval_samples_per_second": 454.594, + "eval_steps_per_second": 3.654, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.867459514502793e-05, + "loss": 1.2529, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.8011892717541894e-05, + "loss": 0.9843, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8323170731707317, + "eval_f1": 0.7607906844449553, + "eval_loss": 0.9312257170677185, + "eval_precision": 0.7813901035438382, + "eval_recall": 0.8323170731707317, + "eval_runtime": 7.8479, + "eval_samples_per_second": 459.738, + "eval_steps_per_second": 3.695, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.734919029005586e-05, + "loss": 0.9036, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8852549889135255, + "eval_f1": 0.8727382252421404, + "eval_loss": 0.8863803744316101, + "eval_precision": 0.8818471384989105, + "eval_recall": 0.8852549889135255, + "eval_runtime": 8.2029, + "eval_samples_per_second": 439.844, + "eval_steps_per_second": 3.535, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 4.668648786256983e-05, + "loss": 0.8702, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 5.602378543508379e-05, + "loss": 0.8272, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9016044442281315, + "eval_loss": 0.827029824256897, + "eval_precision": 0.8977767175054678, + "eval_recall": 0.9088137472283814, + "eval_runtime": 8.6469, + "eval_samples_per_second": 417.259, + "eval_steps_per_second": 3.354, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 6.536108300759776e-05, + "loss": 0.8124, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 7.469838058011172e-05, + "loss": 0.791, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8896895787139689, + "eval_f1": 0.8908302243934083, + "eval_loss": 0.8426142930984497, + "eval_precision": 0.8952544356380175, + "eval_recall": 0.8896895787139689, + "eval_runtime": 7.9673, + "eval_samples_per_second": 452.854, + "eval_steps_per_second": 3.64, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 8.403567815262568e-05, + "loss": 0.7742, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.8952158275122053, + "eval_loss": 0.8156972527503967, + "eval_precision": 0.900683213811247, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.0605, + "eval_samples_per_second": 447.614, + "eval_steps_per_second": 3.598, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 9.048589657902734e-05, + "loss": 0.7771, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 9.02461453820045e-05, + "loss": 0.7696, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9089808097871447, + "eval_loss": 0.8019198775291443, + "eval_precision": 0.9083537029648736, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.2664, + "eval_samples_per_second": 436.464, + "eval_steps_per_second": 3.508, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 8.971109434044089e-05, + "loss": 0.7627, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 8.888425487558681e-05, + "loss": 0.7579, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9056360787135247, + "eval_loss": 0.8119347095489502, + "eval_precision": 0.9037596807714011, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.3286, + "eval_samples_per_second": 433.208, + "eval_steps_per_second": 3.482, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 8.777105335136256e-05, + "loss": 0.753, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.905016737540752, + "eval_loss": 0.8058480024337769, + "eval_precision": 0.9043267295510783, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8299, + "eval_samples_per_second": 460.796, + "eval_steps_per_second": 3.704, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 8.637879546233632e-05, + "loss": 0.7467, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 8.471661828807179e-05, + "loss": 0.7414, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8910753880266076, + "eval_f1": 0.8925899353200606, + "eval_loss": 0.8334646821022034, + "eval_precision": 0.8965909503064864, + "eval_recall": 0.8910753880266076, + "eval_runtime": 8.1268, + "eval_samples_per_second": 443.961, + "eval_steps_per_second": 3.568, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 8.279543032850277e-05, + "loss": 0.736, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.9002456226563228, + "eval_loss": 0.8323202133178711, + "eval_precision": 0.903732584843102, + "eval_recall": 0.899390243902439, + "eval_runtime": 7.848, + "eval_samples_per_second": 459.738, + "eval_steps_per_second": 3.695, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 8.062783991386883e-05, + "loss": 0.7351, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 7.822807245904186e-05, + "loss": 0.7312, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9086965051403931, + "eval_loss": 0.802578330039978, + "eval_precision": 0.9043713569516466, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.9576, + "eval_samples_per_second": 453.405, + "eval_steps_per_second": 3.644, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 7.561187710528515e-05, + "loss": 0.7311, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 7.279642336213425e-05, + "loss": 0.722, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.9053749893268752, + "eval_loss": 0.8218502402305603, + "eval_precision": 0.9072092893262743, + "eval_recall": 0.9054878048780488, + "eval_runtime": 8.5776, + "eval_samples_per_second": 420.629, + "eval_steps_per_second": 3.381, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 6.980018842771594e-05, + "loss": 0.728, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9088337129199915, + "eval_loss": 0.803844690322876, + "eval_precision": 0.9053003849300292, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.4963, + "eval_samples_per_second": 424.653, + "eval_steps_per_second": 3.413, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 6.664283592699743e-05, + "loss": 0.7174, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 6.33450868637792e-05, + "loss": 0.7178, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9115339219530667, + "eval_loss": 0.8020056486129761, + "eval_precision": 0.9126518511003248, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.0058, + "eval_samples_per_second": 450.671, + "eval_steps_per_second": 3.622, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 5.99285836333455e-05, + "loss": 0.7168, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 5.6415747988226656e-05, + "loss": 0.7181, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9074858536331818, + "eval_loss": 0.8075320720672607, + "eval_precision": 0.9090067831913919, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.2679, + "eval_samples_per_second": 436.387, + "eval_steps_per_second": 3.508, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 5.282963388921312e-05, + "loss": 0.7132, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9105576927569372, + "eval_loss": 0.8095524311065674, + "eval_precision": 0.9105897373648217, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.3192, + "eval_samples_per_second": 433.694, + "eval_steps_per_second": 3.486, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 4.919377620732666e-05, + "loss": 0.7122, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 4.553203626968464e-05, + "loss": 0.7102, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9109325265921849, + "eval_loss": 0.8063324093818665, + "eval_precision": 0.909426612792062, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.1988, + "eval_samples_per_second": 440.063, + "eval_steps_per_second": 3.537, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.1868445262905336e-05, + "loss": 0.7107, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 3.8227046521762955e-05, + "loss": 0.7057, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9116152740130952, + "eval_loss": 0.8127285838127136, + "eval_precision": 0.9140500451639162, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.2949, + "eval_samples_per_second": 434.968, + "eval_steps_per_second": 3.496, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.463173773811715e-05, + "loss": 0.7102, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9124581241091632, + "eval_loss": 0.8046479225158691, + "eval_precision": 0.9094511683912293, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.4514, + "eval_samples_per_second": 426.911, + "eval_steps_per_second": 3.431, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.1106114125664475e-05, + "loss": 0.7031, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.7673313569787155e-05, + "loss": 0.6998, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9107592161617032, + "eval_loss": 0.8098756074905396, + "eval_precision": 0.9094635680966053, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.2362, + "eval_samples_per_second": 438.069, + "eval_steps_per_second": 3.521, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.4355864778745852e-05, + "loss": 0.7051, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9142268993626627, + "eval_loss": 0.8050925731658936, + "eval_precision": 0.9130379000974386, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.3981, + "eval_samples_per_second": 429.62, + "eval_steps_per_second": 3.453, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.1175539432766786e-05, + "loss": 0.7013, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.8153209301335927e-05, + "loss": 0.6986, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9094601509445771, + "eval_loss": 0.8089693188667297, + "eval_precision": 0.9069934469200637, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.7255, + "eval_samples_per_second": 413.501, + "eval_steps_per_second": 3.324, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.5308709266407565e-05, + "loss": 0.6993, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.2660707150475642e-05, + "loss": 0.7004, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9157749931932022, + "eval_loss": 0.7969859838485718, + "eval_precision": 0.9146257718265792, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.8691, + "eval_samples_per_second": 458.501, + "eval_steps_per_second": 3.685, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.0226581203797468e-05, + "loss": 0.6992, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9128722156884925, + "eval_loss": 0.8054778575897217, + "eval_precision": 0.9123823991036795, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.3104, + "eval_samples_per_second": 434.157, + "eval_steps_per_second": 3.49, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 8.022306054793715e-06, + "loss": 0.6988, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 6.062347872107096e-06, + "loss": 0.6956, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9159544845078982, + "eval_loss": 0.7970059514045715, + "eval_precision": 0.9139707421362406, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.3576, + "eval_samples_per_second": 431.703, + "eval_steps_per_second": 3.47, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.359569426347826e-06, + "loss": 0.6955, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.925145674584495e-06, + "loss": 0.6941, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9158537735891641, + "eval_loss": 0.8026701807975769, + "eval_precision": 0.9141634537835975, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.2835, + "eval_samples_per_second": 435.562, + "eval_steps_per_second": 3.501, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.7684904215807802e-06, + "loss": 0.6973, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9105265775058081, + "eval_loss": 0.8031827807426453, + "eval_precision": 0.9092055837026697, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.0208, + "eval_samples_per_second": 449.829, + "eval_steps_per_second": 3.616, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 8.971945390837938e-07, + "loss": 0.698, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.169761486173364e-07, + "loss": 0.6969, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9180412395863843, + "eval_loss": 0.7970722913742065, + "eval_precision": 0.9152796171817729, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.4656, + "eval_samples_per_second": 426.196, + "eval_steps_per_second": 3.426, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.029883991666275297, + "learning_rate": 9.049996108744304e-05, + "metric": "eval/loss", + "weight_decay": 0.011787322184488629 + } +} diff --git a/run-cpz6d1iz/checkpoint-616/training_args.bin b/run-cpz6d1iz/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..07fc150a9c6de07e229802881b9611fa1a439874 --- /dev/null +++ b/run-cpz6d1iz/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c95ef0bca0717cbb9b0e686802505037a3eeb9f7ccb304a80c61f2716de896 +size 4792 diff --git a/run-cpz6d1iz/checkpoint-630/model.safetensors b/run-cpz6d1iz/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e5684b8958c2475357c025b2428a212b905a6f7 --- /dev/null +++ b/run-cpz6d1iz/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3466e921717344f64fef9ffef80cd9cbfce5e01047b8583a2b2f607b80cc0e0b +size 198025308 diff --git a/run-cpz6d1iz/checkpoint-630/optimizer.pt b/run-cpz6d1iz/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f918669c5a9a13ffbd51afb66e8ea735ea597178 --- /dev/null +++ b/run-cpz6d1iz/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435771ec7b97798fccc1e1b7fc454f97701c9b11b2d0d581b395fc21b4a2214e +size 395900602 diff --git a/run-cpz6d1iz/checkpoint-630/rng_state.pth b/run-cpz6d1iz/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-cpz6d1iz/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-cpz6d1iz/checkpoint-630/scheduler.pt b/run-cpz6d1iz/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a12e5bf4c04ce553e03d76bdbf8cf08115ae74e --- /dev/null +++ b/run-cpz6d1iz/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b507fbf592ee45838ab7eb53c1772b880a5df47fb169d5494285fd858590aaa +size 1064 diff --git a/run-cpz6d1iz/checkpoint-630/trainer_state.json b/run-cpz6d1iz/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b543d4a21a71d324117e1bc04f350a31ad0a6456 --- /dev/null +++ b/run-cpz6d1iz/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9180412395863843, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-cpz6d1iz/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 9.337297572513965e-06, + "loss": 1.4974, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8245565410199557, + "eval_f1": 0.7504652622410692, + "eval_loss": 1.1778662204742432, + "eval_precision": 0.7010977125485033, + "eval_recall": 0.8245565410199557, + "eval_runtime": 7.9368, + "eval_samples_per_second": 454.594, + "eval_steps_per_second": 3.654, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.867459514502793e-05, + "loss": 1.2529, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.8011892717541894e-05, + "loss": 0.9843, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8323170731707317, + "eval_f1": 0.7607906844449553, + "eval_loss": 0.9312257170677185, + "eval_precision": 0.7813901035438382, + "eval_recall": 0.8323170731707317, + "eval_runtime": 7.8479, + "eval_samples_per_second": 459.738, + "eval_steps_per_second": 3.695, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.734919029005586e-05, + "loss": 0.9036, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8852549889135255, + "eval_f1": 0.8727382252421404, + "eval_loss": 0.8863803744316101, + "eval_precision": 0.8818471384989105, + "eval_recall": 0.8852549889135255, + "eval_runtime": 8.2029, + "eval_samples_per_second": 439.844, + "eval_steps_per_second": 3.535, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 4.668648786256983e-05, + "loss": 0.8702, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 5.602378543508379e-05, + "loss": 0.8272, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9016044442281315, + "eval_loss": 0.827029824256897, + "eval_precision": 0.8977767175054678, + "eval_recall": 0.9088137472283814, + "eval_runtime": 8.6469, + "eval_samples_per_second": 417.259, + "eval_steps_per_second": 3.354, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 6.536108300759776e-05, + "loss": 0.8124, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 7.469838058011172e-05, + "loss": 0.791, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8896895787139689, + "eval_f1": 0.8908302243934083, + "eval_loss": 0.8426142930984497, + "eval_precision": 0.8952544356380175, + "eval_recall": 0.8896895787139689, + "eval_runtime": 7.9673, + "eval_samples_per_second": 452.854, + "eval_steps_per_second": 3.64, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 8.403567815262568e-05, + "loss": 0.7742, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.8952158275122053, + "eval_loss": 0.8156972527503967, + "eval_precision": 0.900683213811247, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.0605, + "eval_samples_per_second": 447.614, + "eval_steps_per_second": 3.598, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 9.048589657902734e-05, + "loss": 0.7771, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 9.02461453820045e-05, + "loss": 0.7696, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9089808097871447, + "eval_loss": 0.8019198775291443, + "eval_precision": 0.9083537029648736, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.2664, + "eval_samples_per_second": 436.464, + "eval_steps_per_second": 3.508, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 8.971109434044089e-05, + "loss": 0.7627, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 8.888425487558681e-05, + "loss": 0.7579, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9056360787135247, + "eval_loss": 0.8119347095489502, + "eval_precision": 0.9037596807714011, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.3286, + "eval_samples_per_second": 433.208, + "eval_steps_per_second": 3.482, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 8.777105335136256e-05, + "loss": 0.753, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.905016737540752, + "eval_loss": 0.8058480024337769, + "eval_precision": 0.9043267295510783, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8299, + "eval_samples_per_second": 460.796, + "eval_steps_per_second": 3.704, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 8.637879546233632e-05, + "loss": 0.7467, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 8.471661828807179e-05, + "loss": 0.7414, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8910753880266076, + "eval_f1": 0.8925899353200606, + "eval_loss": 0.8334646821022034, + "eval_precision": 0.8965909503064864, + "eval_recall": 0.8910753880266076, + "eval_runtime": 8.1268, + "eval_samples_per_second": 443.961, + "eval_steps_per_second": 3.568, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 8.279543032850277e-05, + "loss": 0.736, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.9002456226563228, + "eval_loss": 0.8323202133178711, + "eval_precision": 0.903732584843102, + "eval_recall": 0.899390243902439, + "eval_runtime": 7.848, + "eval_samples_per_second": 459.738, + "eval_steps_per_second": 3.695, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 8.062783991386883e-05, + "loss": 0.7351, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 7.822807245904186e-05, + "loss": 0.7312, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9086965051403931, + "eval_loss": 0.802578330039978, + "eval_precision": 0.9043713569516466, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.9576, + "eval_samples_per_second": 453.405, + "eval_steps_per_second": 3.644, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 7.561187710528515e-05, + "loss": 0.7311, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 7.279642336213425e-05, + "loss": 0.722, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.9053749893268752, + "eval_loss": 0.8218502402305603, + "eval_precision": 0.9072092893262743, + "eval_recall": 0.9054878048780488, + "eval_runtime": 8.5776, + "eval_samples_per_second": 420.629, + "eval_steps_per_second": 3.381, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 6.980018842771594e-05, + "loss": 0.728, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9088337129199915, + "eval_loss": 0.803844690322876, + "eval_precision": 0.9053003849300292, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.4963, + "eval_samples_per_second": 424.653, + "eval_steps_per_second": 3.413, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 6.664283592699743e-05, + "loss": 0.7174, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 6.33450868637792e-05, + "loss": 0.7178, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9115339219530667, + "eval_loss": 0.8020056486129761, + "eval_precision": 0.9126518511003248, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.0058, + "eval_samples_per_second": 450.671, + "eval_steps_per_second": 3.622, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 5.99285836333455e-05, + "loss": 0.7168, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 5.6415747988226656e-05, + "loss": 0.7181, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9074858536331818, + "eval_loss": 0.8075320720672607, + "eval_precision": 0.9090067831913919, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.2679, + "eval_samples_per_second": 436.387, + "eval_steps_per_second": 3.508, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 5.282963388921312e-05, + "loss": 0.7132, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9105576927569372, + "eval_loss": 0.8095524311065674, + "eval_precision": 0.9105897373648217, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.3192, + "eval_samples_per_second": 433.694, + "eval_steps_per_second": 3.486, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 4.919377620732666e-05, + "loss": 0.7122, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 4.553203626968464e-05, + "loss": 0.7102, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9109325265921849, + "eval_loss": 0.8063324093818665, + "eval_precision": 0.909426612792062, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.1988, + "eval_samples_per_second": 440.063, + "eval_steps_per_second": 3.537, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.1868445262905336e-05, + "loss": 0.7107, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 3.8227046521762955e-05, + "loss": 0.7057, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9116152740130952, + "eval_loss": 0.8127285838127136, + "eval_precision": 0.9140500451639162, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.2949, + "eval_samples_per_second": 434.968, + "eval_steps_per_second": 3.496, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.463173773811715e-05, + "loss": 0.7102, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9124581241091632, + "eval_loss": 0.8046479225158691, + "eval_precision": 0.9094511683912293, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.4514, + "eval_samples_per_second": 426.911, + "eval_steps_per_second": 3.431, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.1106114125664475e-05, + "loss": 0.7031, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.7673313569787155e-05, + "loss": 0.6998, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9107592161617032, + "eval_loss": 0.8098756074905396, + "eval_precision": 0.9094635680966053, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.2362, + "eval_samples_per_second": 438.069, + "eval_steps_per_second": 3.521, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.4355864778745852e-05, + "loss": 0.7051, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9142268993626627, + "eval_loss": 0.8050925731658936, + "eval_precision": 0.9130379000974386, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.3981, + "eval_samples_per_second": 429.62, + "eval_steps_per_second": 3.453, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.1175539432766786e-05, + "loss": 0.7013, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.8153209301335927e-05, + "loss": 0.6986, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9094601509445771, + "eval_loss": 0.8089693188667297, + "eval_precision": 0.9069934469200637, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.7255, + "eval_samples_per_second": 413.501, + "eval_steps_per_second": 3.324, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.5308709266407565e-05, + "loss": 0.6993, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.2660707150475642e-05, + "loss": 0.7004, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9157749931932022, + "eval_loss": 0.7969859838485718, + "eval_precision": 0.9146257718265792, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.8691, + "eval_samples_per_second": 458.501, + "eval_steps_per_second": 3.685, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.0226581203797468e-05, + "loss": 0.6992, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9128722156884925, + "eval_loss": 0.8054778575897217, + "eval_precision": 0.9123823991036795, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.3104, + "eval_samples_per_second": 434.157, + "eval_steps_per_second": 3.49, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 8.022306054793715e-06, + "loss": 0.6988, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 6.062347872107096e-06, + "loss": 0.6956, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9159544845078982, + "eval_loss": 0.7970059514045715, + "eval_precision": 0.9139707421362406, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.3576, + "eval_samples_per_second": 431.703, + "eval_steps_per_second": 3.47, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.359569426347826e-06, + "loss": 0.6955, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.925145674584495e-06, + "loss": 0.6941, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9158537735891641, + "eval_loss": 0.8026701807975769, + "eval_precision": 0.9141634537835975, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.2835, + "eval_samples_per_second": 435.562, + "eval_steps_per_second": 3.501, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.7684904215807802e-06, + "loss": 0.6973, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9105265775058081, + "eval_loss": 0.8031827807426453, + "eval_precision": 0.9092055837026697, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.0208, + "eval_samples_per_second": 449.829, + "eval_steps_per_second": 3.616, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 8.971945390837938e-07, + "loss": 0.698, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.169761486173364e-07, + "loss": 0.6969, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9180412395863843, + "eval_loss": 0.7970722913742065, + "eval_precision": 0.9152796171817729, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.4656, + "eval_samples_per_second": 426.196, + "eval_steps_per_second": 3.426, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 3.1643094719280604e-08, + "loss": 0.6981, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9142339823689526, + "eval_loss": 0.804097056388855, + "eval_precision": 0.913375902362099, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8761, + "eval_samples_per_second": 458.092, + "eval_steps_per_second": 3.682, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.029883991666275297, + "learning_rate": 9.049996108744304e-05, + "metric": "eval/loss", + "weight_decay": 0.011787322184488629 + } +} diff --git a/run-cpz6d1iz/checkpoint-630/training_args.bin b/run-cpz6d1iz/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..07fc150a9c6de07e229802881b9611fa1a439874 --- /dev/null +++ b/run-cpz6d1iz/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19c95ef0bca0717cbb9b0e686802505037a3eeb9f7ccb304a80c61f2716de896 +size 4792 diff --git a/run-cqu1wme3/checkpoint-1232/model.safetensors b/run-cqu1wme3/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..214219f4d215b886d6ebf11cf3acad0541afed5b --- /dev/null +++ b/run-cqu1wme3/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9def4d8ba537217d92afd3819905c3442481c0318b385eb803b46f4806aba389 +size 198025308 diff --git a/run-cqu1wme3/checkpoint-1232/optimizer.pt b/run-cqu1wme3/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a24a10aae5ae25339e28e8411595bf4e5864353b --- /dev/null +++ b/run-cqu1wme3/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f7eaafa84cbee0fa4590d4a11e38b999e39ed41d250e4eab5d89639ec4bc9b +size 395900602 diff --git a/run-cqu1wme3/checkpoint-1232/rng_state.pth b/run-cqu1wme3/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-cqu1wme3/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-cqu1wme3/checkpoint-1232/scheduler.pt b/run-cqu1wme3/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab8c1b88598fc7a2cfaae36a72e7278c652e313e --- /dev/null +++ b/run-cqu1wme3/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:774afbd229f0ea02d0ebf64da903d7cdd2b485cde4a3d60cd79c41a3e25f8920 +size 1064 diff --git a/run-cqu1wme3/checkpoint-1232/trainer_state.json b/run-cqu1wme3/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2c49335f3f6199bd7cdad4c661d180ccd05a653a --- /dev/null +++ b/run-cqu1wme3/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9229490022172949, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-cqu1wme3/checkpoint-1020", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.1560595690756742e-05, + "loss": 1.3678, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8367516629711752, + "eval_loss": 0.9278929829597473, + "eval_runtime": 6.7956, + "eval_samples_per_second": 530.931, + "eval_steps_per_second": 8.388, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.3121191381513484e-05, + "loss": 0.9524, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 6.468178707227022e-05, + "loss": 0.8559, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.816893994808197, + "eval_runtime": 7.0223, + "eval_samples_per_second": 513.79, + "eval_steps_per_second": 8.117, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 8.624238276302697e-05, + "loss": 0.812, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8966186252771619, + "eval_loss": 0.839713990688324, + "eval_runtime": 6.7918, + "eval_samples_per_second": 531.23, + "eval_steps_per_second": 8.392, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001078029784537837, + "loss": 0.7972, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00012936357414454044, + "loss": 0.7848, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8289402723312378, + "eval_runtime": 7.0884, + "eval_samples_per_second": 509.003, + "eval_steps_per_second": 8.041, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014677018216894644, + "loss": 0.7814, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001464813676224163, + "loss": 0.7745, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.7983811497688293, + "eval_runtime": 6.8009, + "eval_samples_per_second": 530.521, + "eval_steps_per_second": 8.381, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00014577697180081213, + "loss": 0.7652, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8263856172561646, + "eval_runtime": 6.9878, + "eval_samples_per_second": 516.328, + "eval_steps_per_second": 8.157, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001446609996847611, + "loss": 0.7605, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00014313979635265258, + "loss": 0.7503, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8045262098312378, + "eval_runtime": 6.833, + "eval_samples_per_second": 528.029, + "eval_steps_per_second": 8.342, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00014122201090445023, + "loss": 0.7506, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001389185472855375, + "loss": 0.7424, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8193584680557251, + "eval_runtime": 6.7684, + "eval_samples_per_second": 533.063, + "eval_steps_per_second": 8.421, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013624250229019694, + "loss": 0.7306, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8103824257850647, + "eval_runtime": 6.8876, + "eval_samples_per_second": 523.843, + "eval_steps_per_second": 8.276, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00013320909109721727, + "loss": 0.7395, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00012983556076100948, + "loss": 0.7294, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8181220889091492, + "eval_runtime": 6.8253, + "eval_samples_per_second": 528.621, + "eval_steps_per_second": 8.351, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001261410921500954, + "loss": 0.7265, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8041738867759705, + "eval_runtime": 6.9707, + "eval_samples_per_second": 517.596, + "eval_steps_per_second": 8.177, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00012214669089051738, + "loss": 0.7236, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00011787506793423023, + "loss": 0.7158, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8077400922775269, + "eval_runtime": 6.8741, + "eval_samples_per_second": 524.872, + "eval_steps_per_second": 8.292, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00011335051043152884, + "loss": 0.7178, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00010859874364169069, + "loss": 0.7174, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8082053661346436, + "eval_runtime": 6.8548, + "eval_samples_per_second": 526.343, + "eval_steps_per_second": 8.315, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00010364678466696757, + "loss": 0.7126, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8127850890159607, + "eval_runtime": 6.907, + "eval_samples_per_second": 522.366, + "eval_steps_per_second": 8.252, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 9.852278884155077e-05, + "loss": 0.7157, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 9.32558896488951e-05, + "loss": 0.7051, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8272565603256226, + "eval_runtime": 6.566, + "eval_samples_per_second": 549.501, + "eval_steps_per_second": 8.681, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 8.787603307758365e-05, + "loss": 0.7082, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 8.241380735753505e-05, + "loss": 0.7026, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8113918900489807, + "eval_runtime": 6.8137, + "eval_samples_per_second": 529.52, + "eval_steps_per_second": 8.365, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 7.690026904462161e-05, + "loss": 0.7019, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.8095203638076782, + "eval_runtime": 6.6575, + "eval_samples_per_second": 541.947, + "eval_steps_per_second": 8.562, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 7.136676644252854e-05, + "loss": 0.697, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 6.584476136582366e-05, + "loss": 0.701, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.7983012199401855, + "eval_runtime": 6.9704, + "eval_samples_per_second": 517.615, + "eval_steps_per_second": 8.177, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.036565025763871e-05, + "loss": 0.6973, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 5.4960585679032726e-05, + "loss": 0.698, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8122344613075256, + "eval_runtime": 6.6709, + "eval_samples_per_second": 540.857, + "eval_steps_per_second": 8.545, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 4.9660299184994896e-05, + "loss": 0.705, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8036772608757019, + "eval_runtime": 6.6467, + "eval_samples_per_second": 542.829, + "eval_steps_per_second": 8.576, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 4.4494926594159986e-05, + "loss": 0.6941, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 3.949383664569964e-05, + "loss": 0.6936, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8057708144187927, + "eval_runtime": 6.8537, + "eval_samples_per_second": 526.432, + "eval_steps_per_second": 8.317, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.468546401759446e-05, + "loss": 0.6921, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8063600659370422, + "eval_runtime": 7.1772, + "eval_samples_per_second": 502.706, + "eval_steps_per_second": 7.942, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.009714765569426e-05, + "loss": 0.694, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.5754975332778277e-05, + "loss": 0.6928, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8025647401809692, + "eval_runtime": 7.412, + "eval_samples_per_second": 486.777, + "eval_steps_per_second": 7.69, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.168363532140519e-05, + "loss": 0.6929, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.7906276023896578e-05, + "loss": 0.6904, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.797930896282196, + "eval_runtime": 7.1216, + "eval_samples_per_second": 506.63, + "eval_steps_per_second": 8.004, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.4444374357554102e-05, + "loss": 0.6882, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8026780486106873, + "eval_runtime": 6.8953, + "eval_samples_per_second": 523.259, + "eval_steps_per_second": 8.267, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.1317613643433368e-05, + "loss": 0.6883, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 8.543771692960346e-06, + "loss": 0.6882, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8103070855140686, + "eval_runtime": 6.8667, + "eval_samples_per_second": 525.432, + "eval_steps_per_second": 8.301, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.138619728697284e-06, + "loss": 0.6886, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.115832713962798e-06, + "loss": 0.692, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8019214272499084, + "eval_runtime": 6.7053, + "eval_samples_per_second": 538.083, + "eval_steps_per_second": 8.501, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.4869116011452495e-06, + "loss": 0.6894, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.8019643425941467, + "eval_runtime": 6.8834, + "eval_samples_per_second": 524.158, + "eval_steps_per_second": 8.281, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.2611179407818806e-06, + "loss": 0.684, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.454212231962345e-07, + "loss": 0.6869, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.7989235520362854, + "eval_runtime": 6.6346, + "eval_samples_per_second": 543.819, + "eval_steps_per_second": 8.591, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00014677790143322858, + "metric": "eval/loss", + "warmup_ratio": 0.14047536488599555 + } +} diff --git a/run-cqu1wme3/checkpoint-1232/training_args.bin b/run-cqu1wme3/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9bf0ec3ec23fc862ad5d6ce78232993524bc04fa --- /dev/null +++ b/run-cqu1wme3/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94da8890f00d84861f6d0d9e291c128b145878ecb0ab058c4ae325d280d2310 +size 4792 diff --git a/run-cqu1wme3/checkpoint-1260/model.safetensors b/run-cqu1wme3/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52aed710f0b057903d1109ed8c42a6d0d3eb6bc2 --- /dev/null +++ b/run-cqu1wme3/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7122a52779074c12598e76c591b2b71d1bd08cccef49d41ee78ecd499c7d21f7 +size 198025308 diff --git a/run-cqu1wme3/checkpoint-1260/optimizer.pt b/run-cqu1wme3/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..776b63311985eaac4bc96c93d8d09994e64a3af7 --- /dev/null +++ b/run-cqu1wme3/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84198b757fc4602b6f10403ee401ae03891419f2b3ddb2a6527cb5cf20d21621 +size 395900602 diff --git a/run-cqu1wme3/checkpoint-1260/rng_state.pth b/run-cqu1wme3/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-cqu1wme3/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-cqu1wme3/checkpoint-1260/scheduler.pt b/run-cqu1wme3/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..380d11fb4613100a36a8ad6366f70df152d9dc34 --- /dev/null +++ b/run-cqu1wme3/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fd17d613860bde2c1bb1c8af7c9bdd3f7ef7d054961f140726714824ae59910 +size 1064 diff --git a/run-cqu1wme3/checkpoint-1260/trainer_state.json b/run-cqu1wme3/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ac9938150878abe81c02e37a12669521fc14fcc3 --- /dev/null +++ b/run-cqu1wme3/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9235033259423503, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-cqu1wme3/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.1560595690756742e-05, + "loss": 1.3678, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8367516629711752, + "eval_loss": 0.9278929829597473, + "eval_runtime": 6.7956, + "eval_samples_per_second": 530.931, + "eval_steps_per_second": 8.388, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.3121191381513484e-05, + "loss": 0.9524, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 6.468178707227022e-05, + "loss": 0.8559, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.816893994808197, + "eval_runtime": 7.0223, + "eval_samples_per_second": 513.79, + "eval_steps_per_second": 8.117, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 8.624238276302697e-05, + "loss": 0.812, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8966186252771619, + "eval_loss": 0.839713990688324, + "eval_runtime": 6.7918, + "eval_samples_per_second": 531.23, + "eval_steps_per_second": 8.392, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001078029784537837, + "loss": 0.7972, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00012936357414454044, + "loss": 0.7848, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8289402723312378, + "eval_runtime": 7.0884, + "eval_samples_per_second": 509.003, + "eval_steps_per_second": 8.041, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014677018216894644, + "loss": 0.7814, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001464813676224163, + "loss": 0.7745, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.7983811497688293, + "eval_runtime": 6.8009, + "eval_samples_per_second": 530.521, + "eval_steps_per_second": 8.381, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00014577697180081213, + "loss": 0.7652, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8263856172561646, + "eval_runtime": 6.9878, + "eval_samples_per_second": 516.328, + "eval_steps_per_second": 8.157, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001446609996847611, + "loss": 0.7605, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00014313979635265258, + "loss": 0.7503, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8045262098312378, + "eval_runtime": 6.833, + "eval_samples_per_second": 528.029, + "eval_steps_per_second": 8.342, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00014122201090445023, + "loss": 0.7506, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001389185472855375, + "loss": 0.7424, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8193584680557251, + "eval_runtime": 6.7684, + "eval_samples_per_second": 533.063, + "eval_steps_per_second": 8.421, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013624250229019694, + "loss": 0.7306, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8103824257850647, + "eval_runtime": 6.8876, + "eval_samples_per_second": 523.843, + "eval_steps_per_second": 8.276, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00013320909109721727, + "loss": 0.7395, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00012983556076100948, + "loss": 0.7294, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8181220889091492, + "eval_runtime": 6.8253, + "eval_samples_per_second": 528.621, + "eval_steps_per_second": 8.351, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001261410921500954, + "loss": 0.7265, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8041738867759705, + "eval_runtime": 6.9707, + "eval_samples_per_second": 517.596, + "eval_steps_per_second": 8.177, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00012214669089051738, + "loss": 0.7236, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00011787506793423023, + "loss": 0.7158, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8077400922775269, + "eval_runtime": 6.8741, + "eval_samples_per_second": 524.872, + "eval_steps_per_second": 8.292, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00011335051043152884, + "loss": 0.7178, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00010859874364169069, + "loss": 0.7174, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8082053661346436, + "eval_runtime": 6.8548, + "eval_samples_per_second": 526.343, + "eval_steps_per_second": 8.315, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00010364678466696757, + "loss": 0.7126, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8127850890159607, + "eval_runtime": 6.907, + "eval_samples_per_second": 522.366, + "eval_steps_per_second": 8.252, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 9.852278884155077e-05, + "loss": 0.7157, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 9.32558896488951e-05, + "loss": 0.7051, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8272565603256226, + "eval_runtime": 6.566, + "eval_samples_per_second": 549.501, + "eval_steps_per_second": 8.681, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 8.787603307758365e-05, + "loss": 0.7082, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 8.241380735753505e-05, + "loss": 0.7026, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8113918900489807, + "eval_runtime": 6.8137, + "eval_samples_per_second": 529.52, + "eval_steps_per_second": 8.365, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 7.690026904462161e-05, + "loss": 0.7019, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.8095203638076782, + "eval_runtime": 6.6575, + "eval_samples_per_second": 541.947, + "eval_steps_per_second": 8.562, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 7.136676644252854e-05, + "loss": 0.697, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 6.584476136582366e-05, + "loss": 0.701, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.7983012199401855, + "eval_runtime": 6.9704, + "eval_samples_per_second": 517.615, + "eval_steps_per_second": 8.177, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.036565025763871e-05, + "loss": 0.6973, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 5.4960585679032726e-05, + "loss": 0.698, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8122344613075256, + "eval_runtime": 6.6709, + "eval_samples_per_second": 540.857, + "eval_steps_per_second": 8.545, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 4.9660299184994896e-05, + "loss": 0.705, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8036772608757019, + "eval_runtime": 6.6467, + "eval_samples_per_second": 542.829, + "eval_steps_per_second": 8.576, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 4.4494926594159986e-05, + "loss": 0.6941, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 3.949383664569964e-05, + "loss": 0.6936, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8057708144187927, + "eval_runtime": 6.8537, + "eval_samples_per_second": 526.432, + "eval_steps_per_second": 8.317, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.468546401759446e-05, + "loss": 0.6921, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8063600659370422, + "eval_runtime": 7.1772, + "eval_samples_per_second": 502.706, + "eval_steps_per_second": 7.942, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.009714765569426e-05, + "loss": 0.694, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.5754975332778277e-05, + "loss": 0.6928, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8025647401809692, + "eval_runtime": 7.412, + "eval_samples_per_second": 486.777, + "eval_steps_per_second": 7.69, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.168363532140519e-05, + "loss": 0.6929, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.7906276023896578e-05, + "loss": 0.6904, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.797930896282196, + "eval_runtime": 7.1216, + "eval_samples_per_second": 506.63, + "eval_steps_per_second": 8.004, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.4444374357554102e-05, + "loss": 0.6882, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8026780486106873, + "eval_runtime": 6.8953, + "eval_samples_per_second": 523.259, + "eval_steps_per_second": 8.267, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.1317613643433368e-05, + "loss": 0.6883, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 8.543771692960346e-06, + "loss": 0.6882, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8103070855140686, + "eval_runtime": 6.8667, + "eval_samples_per_second": 525.432, + "eval_steps_per_second": 8.301, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.138619728697284e-06, + "loss": 0.6886, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.115832713962798e-06, + "loss": 0.692, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8019214272499084, + "eval_runtime": 6.7053, + "eval_samples_per_second": 538.083, + "eval_steps_per_second": 8.501, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.4869116011452495e-06, + "loss": 0.6894, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.8019643425941467, + "eval_runtime": 6.8834, + "eval_samples_per_second": 524.158, + "eval_steps_per_second": 8.281, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.2611179407818806e-06, + "loss": 0.684, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.454212231962345e-07, + "loss": 0.6869, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.7989235520362854, + "eval_runtime": 6.6346, + "eval_samples_per_second": 543.819, + "eval_steps_per_second": 8.591, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 4.445925209380897e-08, + "loss": 0.6909, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9235033259423503, + "eval_loss": 0.7974586486816406, + "eval_runtime": 6.873, + "eval_samples_per_second": 524.955, + "eval_steps_per_second": 8.293, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00014677790143322858, + "metric": "eval/loss", + "warmup_ratio": 0.14047536488599555 + } +} diff --git a/run-cqu1wme3/checkpoint-1260/training_args.bin b/run-cqu1wme3/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9bf0ec3ec23fc862ad5d6ce78232993524bc04fa --- /dev/null +++ b/run-cqu1wme3/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94da8890f00d84861f6d0d9e291c128b145878ecb0ab058c4ae325d280d2310 +size 4792 diff --git a/run-d5dfkzm5/checkpoint-616/model.safetensors b/run-d5dfkzm5/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6f05e22b1e89e66de3afad8bb6a808a0bb9fbcf --- /dev/null +++ b/run-d5dfkzm5/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad673486b4bf9c989fe60bf5a4f75a2568a069a2a1ff38f64890777cd296544c +size 198025308 diff --git a/run-d5dfkzm5/checkpoint-616/optimizer.pt b/run-d5dfkzm5/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..42d4455b4b7dcd12702153601e65fdb546c9658d --- /dev/null +++ b/run-d5dfkzm5/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946b061e1d569f6ed453d6f350b7a0a0de19d2cf9aebb18b52d3732ff5f87c4b +size 395900602 diff --git a/run-d5dfkzm5/checkpoint-616/rng_state.pth b/run-d5dfkzm5/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-d5dfkzm5/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-d5dfkzm5/checkpoint-616/scheduler.pt b/run-d5dfkzm5/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b5a0bb8210ab19a5f87da994abc5f16dd850db0 --- /dev/null +++ b/run-d5dfkzm5/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb405c5dbb20635a98a5be53e483463b13579a13c129b8a1d521764751c6b45 +size 1064 diff --git a/run-d5dfkzm5/checkpoint-616/trainer_state.json b/run-d5dfkzm5/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..970a229bca6659e9359e081bdeddbf1dbfff0642 --- /dev/null +++ b/run-d5dfkzm5/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9157855128217186, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-d5dfkzm5/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00010497405660086823, + "loss": 1.2695, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1348450183868408, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9041, + "eval_samples_per_second": 456.47, + "eval_steps_per_second": 3.669, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00020994811320173645, + "loss": 0.9293, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003149221698026047, + "loss": 0.8167, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.883869179600887, + "eval_f1": 0.8828859969975433, + "eval_loss": 0.8560723066329956, + "eval_precision": 0.8825546023914829, + "eval_recall": 0.883869179600887, + "eval_runtime": 8.1915, + "eval_samples_per_second": 440.458, + "eval_steps_per_second": 3.54, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0004198962264034729, + "loss": 0.7973, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8752771618625277, + "eval_f1": 0.8794460202521345, + "eval_loss": 0.8698550462722778, + "eval_precision": 0.8905892812158781, + "eval_recall": 0.8752771618625277, + "eval_runtime": 8.3823, + "eval_samples_per_second": 430.433, + "eval_steps_per_second": 3.46, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0005248702830043411, + "loss": 0.7912, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0006298443396052094, + "loss": 0.7822, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6610310421286031, + "eval_f1": 0.7098798507096755, + "eval_loss": 1.1326546669006348, + "eval_precision": 0.8434564575497714, + "eval_recall": 0.6610310421286031, + "eval_runtime": 8.0747, + "eval_samples_per_second": 446.829, + "eval_steps_per_second": 3.591, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0007348183962060775, + "loss": 0.803, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0007665825826477562, + "loss": 0.7829, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8913525498891353, + "eval_f1": 0.8805972313203657, + "eval_loss": 0.8522917628288269, + "eval_precision": 0.8804890141116605, + "eval_recall": 0.8913525498891353, + "eval_runtime": 8.1995, + "eval_samples_per_second": 440.028, + "eval_steps_per_second": 3.537, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0007639218930599402, + "loss": 0.7893, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8802660753880266, + "eval_f1": 0.8650090321797618, + "eval_loss": 0.8883856534957886, + "eval_precision": 0.8622853700623703, + "eval_recall": 0.8802660753880266, + "eval_runtime": 7.8762, + "eval_samples_per_second": 458.088, + "eval_steps_per_second": 3.682, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007590457325484003, + "loss": 0.787, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007519825029197643, + "loss": 0.7866, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8777716186252772, + "eval_f1": 0.8676898216307096, + "eval_loss": 0.8574779629707336, + "eval_precision": 0.8759458227523348, + "eval_recall": 0.8777716186252772, + "eval_runtime": 8.3587, + "eval_samples_per_second": 431.644, + "eval_steps_per_second": 3.469, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007427733448382094, + "loss": 0.7853, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007314718981965191, + "loss": 0.773, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8483924611973392, + "eval_f1": 0.7975410705564457, + "eval_loss": 0.9413962960243225, + "eval_precision": 0.8410990720259109, + "eval_recall": 0.8483924611973392, + "eval_runtime": 7.9512, + "eval_samples_per_second": 453.766, + "eval_steps_per_second": 3.647, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007181439896838214, + "loss": 0.7711, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.6926274944567627, + "eval_f1": 0.7464250558773307, + "eval_loss": 1.135392665863037, + "eval_precision": 0.8709253990326553, + "eval_recall": 0.6926274944567627, + "eval_runtime": 8.1764, + "eval_samples_per_second": 441.271, + "eval_steps_per_second": 3.547, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0007028672493698059, + "loss": 0.7823, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0006857306585386761, + "loss": 0.7732, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8813747228381374, + "eval_f1": 0.8669936699222319, + "eval_loss": 0.8585548400878906, + "eval_precision": 0.8788989933127026, + "eval_recall": 0.8813747228381374, + "eval_runtime": 8.309, + "eval_samples_per_second": 434.228, + "eval_steps_per_second": 3.49, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0006668340314065332, + "loss": 0.7524, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8708425720620843, + "eval_f1": 0.8751593707457973, + "eval_loss": 0.8690881729125977, + "eval_precision": 0.8858550210130286, + "eval_recall": 0.8708425720620843, + "eval_runtime": 8.1915, + "eval_samples_per_second": 440.457, + "eval_steps_per_second": 3.54, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0006462874337409932, + "loss": 0.7652, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0006242105417693606, + "loss": 0.7559, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8786031042128604, + "eval_f1": 0.8668474013574211, + "eval_loss": 0.877130925655365, + "eval_precision": 0.8683425004570275, + "eval_recall": 0.8786031042128604, + "eval_runtime": 8.2125, + "eval_samples_per_second": 439.332, + "eval_steps_per_second": 3.531, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006007319451094783, + "loss": 0.7506, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0005759883977834181, + "loss": 0.7508, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8808203991130821, + "eval_f1": 0.8787970646382295, + "eval_loss": 0.8647807240486145, + "eval_precision": 0.8800472409367005, + "eval_recall": 0.8808203991130821, + "eval_runtime": 8.0208, + "eval_samples_per_second": 449.832, + "eval_steps_per_second": 3.616, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00055012402167658, + "loss": 0.7375, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8841849297244676, + "eval_loss": 0.8476831912994385, + "eval_precision": 0.8832339068482002, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.7906, + "eval_samples_per_second": 463.12, + "eval_steps_per_second": 3.722, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005232894670817497, + "loss": 0.7453, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0004956410352176292, + "loss": 0.7404, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.8953801524204328, + "eval_loss": 0.8285048007965088, + "eval_precision": 0.8961409144782271, + "eval_recall": 0.9016075388026608, + "eval_runtime": 7.8559, + "eval_samples_per_second": 459.274, + "eval_steps_per_second": 3.692, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0004673397678328449, + "loss": 0.7306, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00043855050919814606, + "loss": 0.7361, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.8708947259813121, + "eval_loss": 0.8673096895217896, + "eval_precision": 0.8753090180371028, + "eval_recall": 0.8891352549889135, + "eval_runtime": 7.7805, + "eval_samples_per_second": 463.723, + "eval_steps_per_second": 3.727, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0004094409459503364, + "loss": 0.7258, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.858370288248337, + "eval_f1": 0.8669292820949759, + "eval_loss": 0.8937473893165588, + "eval_precision": 0.8839535061570079, + "eval_recall": 0.858370288248337, + "eval_runtime": 8.1815, + "eval_samples_per_second": 440.996, + "eval_steps_per_second": 3.545, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003801806303804908, + "loss": 0.7236, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003509399928554339, + "loss": 0.7207, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.8906639857373484, + "eval_loss": 0.8336716294288635, + "eval_precision": 0.8908135356533609, + "eval_recall": 0.9024390243902439, + "eval_runtime": 7.3376, + "eval_samples_per_second": 491.717, + "eval_steps_per_second": 3.952, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0003218893491247583, + "loss": 0.7139, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.0002931979082954435, + "loss": 0.7144, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9074279379157428, + "eval_f1": 0.9035783041513764, + "eval_loss": 0.8186041116714478, + "eval_precision": 0.9018457818068526, + "eval_recall": 0.9074279379157428, + "eval_runtime": 8.0479, + "eval_samples_per_second": 448.318, + "eval_steps_per_second": 3.603, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00026503278725225466, + "loss": 0.7056, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8980044345898004, + "eval_f1": 0.8944383555040252, + "eval_loss": 0.840421199798584, + "eval_precision": 0.8921125033262503, + "eval_recall": 0.8980044345898004, + "eval_runtime": 7.9671, + "eval_samples_per_second": 452.862, + "eval_steps_per_second": 3.64, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.0002375580372645499, + "loss": 0.7062, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00021093368844914926, + "loss": 0.7032, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.907658605512251, + "eval_loss": 0.8160489201545715, + "eval_precision": 0.9077101549425725, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0499, + "eval_samples_per_second": 448.204, + "eval_steps_per_second": 3.603, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00018531481765490774, + "loss": 0.7023, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9044269404409218, + "eval_loss": 0.8235645294189453, + "eval_precision": 0.9011951061053649, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.0955, + "eval_samples_per_second": 445.682, + "eval_steps_per_second": 3.582, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00016085064519821878, + "loss": 0.7006, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.00013768366571062214, + "loss": 0.6986, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9112168034041541, + "eval_loss": 0.8147718906402588, + "eval_precision": 0.9104608840844185, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9507, + "eval_samples_per_second": 453.798, + "eval_steps_per_second": 3.647, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 0.00011594881816100437, + "loss": 0.6951, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 9.57726998867019e-05, + "loss": 0.6966, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9105551478729389, + "eval_loss": 0.8192415237426758, + "eval_precision": 0.9098168199209872, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9728, + "eval_samples_per_second": 452.541, + "eval_steps_per_second": 3.637, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 7.727282921147948e-05, + "loss": 0.6931, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9123695735786569, + "eval_loss": 0.8164503574371338, + "eval_precision": 0.9105149091511331, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.4691, + "eval_samples_per_second": 426.019, + "eval_steps_per_second": 3.424, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 6.0556960945358084e-05, + "loss": 0.6873, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 4.572245875325085e-05, + "loss": 0.695, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9084082229862377, + "eval_loss": 0.8206462264060974, + "eval_precision": 0.9063677392438179, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.2618, + "eval_samples_per_second": 436.711, + "eval_steps_per_second": 3.51, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.285572804812171e-05, + "loss": 0.6917, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.2031712711853046e-05, + "loss": 0.6893, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9068072150760188, + "eval_loss": 0.8162545561790466, + "eval_precision": 0.9043807502037643, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.825, + "eval_samples_per_second": 461.085, + "eval_steps_per_second": 3.706, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.3313458575234895e-05, + "loss": 0.6879, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9126523674717834, + "eval_loss": 0.8112335801124573, + "eval_precision": 0.9110965522314393, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.7306, + "eval_samples_per_second": 466.716, + "eval_steps_per_second": 3.751, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 6.751746199639889e-06, + "loss": 0.6934, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.3847950992937473e-06, + "loss": 0.6905, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9157855128217186, + "eval_loss": 0.8093068599700928, + "eval_precision": 0.9151035762840671, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.1829, + "eval_samples_per_second": 440.918, + "eval_steps_per_second": 3.544, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.16830090820578728, + "learning_rate": 0.0007671181059294216, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-d5dfkzm5/checkpoint-616/training_args.bin b/run-d5dfkzm5/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f225e51c7e0c6a248595b7c54c0455f91ec9f856 --- /dev/null +++ b/run-d5dfkzm5/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:618c54aaa16df17c9ef2644875759663934b38c311ac5bdb1f0cae73d28223b0 +size 4792 diff --git a/run-d5dfkzm5/checkpoint-630/model.safetensors b/run-d5dfkzm5/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f10b102c340fa05145818511528e5a6de78c9a68 --- /dev/null +++ b/run-d5dfkzm5/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff813a1cf8357704510170812b99cdf25e977d27f92820dec655d67e65d5c7a6 +size 198025308 diff --git a/run-d5dfkzm5/checkpoint-630/optimizer.pt b/run-d5dfkzm5/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0de6ad21e9968de1e411ebe708641332c58d0f5d --- /dev/null +++ b/run-d5dfkzm5/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399ae1caf8ddc120b86ca52ab2aec3dccb74d2c9ec400e6af574d5dcee76325e +size 395900602 diff --git a/run-d5dfkzm5/checkpoint-630/rng_state.pth b/run-d5dfkzm5/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-d5dfkzm5/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-d5dfkzm5/checkpoint-630/scheduler.pt b/run-d5dfkzm5/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..665df5a6f6fbdfd88782b22b94be6981e6485407 --- /dev/null +++ b/run-d5dfkzm5/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e6614bea6cdaf8273581d4b9f0a68d8784c1a65a50435214d09a2f1b6a25fb +size 1064 diff --git a/run-d5dfkzm5/checkpoint-630/trainer_state.json b/run-d5dfkzm5/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6f42146a268c7554ba9f988cff2b05b9acc32043 --- /dev/null +++ b/run-d5dfkzm5/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9157855128217186, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-d5dfkzm5/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00010497405660086823, + "loss": 1.2695, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1348450183868408, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9041, + "eval_samples_per_second": 456.47, + "eval_steps_per_second": 3.669, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00020994811320173645, + "loss": 0.9293, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003149221698026047, + "loss": 0.8167, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.883869179600887, + "eval_f1": 0.8828859969975433, + "eval_loss": 0.8560723066329956, + "eval_precision": 0.8825546023914829, + "eval_recall": 0.883869179600887, + "eval_runtime": 8.1915, + "eval_samples_per_second": 440.458, + "eval_steps_per_second": 3.54, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0004198962264034729, + "loss": 0.7973, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8752771618625277, + "eval_f1": 0.8794460202521345, + "eval_loss": 0.8698550462722778, + "eval_precision": 0.8905892812158781, + "eval_recall": 0.8752771618625277, + "eval_runtime": 8.3823, + "eval_samples_per_second": 430.433, + "eval_steps_per_second": 3.46, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0005248702830043411, + "loss": 0.7912, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0006298443396052094, + "loss": 0.7822, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6610310421286031, + "eval_f1": 0.7098798507096755, + "eval_loss": 1.1326546669006348, + "eval_precision": 0.8434564575497714, + "eval_recall": 0.6610310421286031, + "eval_runtime": 8.0747, + "eval_samples_per_second": 446.829, + "eval_steps_per_second": 3.591, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0007348183962060775, + "loss": 0.803, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0007665825826477562, + "loss": 0.7829, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8913525498891353, + "eval_f1": 0.8805972313203657, + "eval_loss": 0.8522917628288269, + "eval_precision": 0.8804890141116605, + "eval_recall": 0.8913525498891353, + "eval_runtime": 8.1995, + "eval_samples_per_second": 440.028, + "eval_steps_per_second": 3.537, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0007639218930599402, + "loss": 0.7893, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8802660753880266, + "eval_f1": 0.8650090321797618, + "eval_loss": 0.8883856534957886, + "eval_precision": 0.8622853700623703, + "eval_recall": 0.8802660753880266, + "eval_runtime": 7.8762, + "eval_samples_per_second": 458.088, + "eval_steps_per_second": 3.682, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007590457325484003, + "loss": 0.787, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007519825029197643, + "loss": 0.7866, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8777716186252772, + "eval_f1": 0.8676898216307096, + "eval_loss": 0.8574779629707336, + "eval_precision": 0.8759458227523348, + "eval_recall": 0.8777716186252772, + "eval_runtime": 8.3587, + "eval_samples_per_second": 431.644, + "eval_steps_per_second": 3.469, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007427733448382094, + "loss": 0.7853, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007314718981965191, + "loss": 0.773, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8483924611973392, + "eval_f1": 0.7975410705564457, + "eval_loss": 0.9413962960243225, + "eval_precision": 0.8410990720259109, + "eval_recall": 0.8483924611973392, + "eval_runtime": 7.9512, + "eval_samples_per_second": 453.766, + "eval_steps_per_second": 3.647, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007181439896838214, + "loss": 0.7711, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.6926274944567627, + "eval_f1": 0.7464250558773307, + "eval_loss": 1.135392665863037, + "eval_precision": 0.8709253990326553, + "eval_recall": 0.6926274944567627, + "eval_runtime": 8.1764, + "eval_samples_per_second": 441.271, + "eval_steps_per_second": 3.547, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0007028672493698059, + "loss": 0.7823, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0006857306585386761, + "loss": 0.7732, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8813747228381374, + "eval_f1": 0.8669936699222319, + "eval_loss": 0.8585548400878906, + "eval_precision": 0.8788989933127026, + "eval_recall": 0.8813747228381374, + "eval_runtime": 8.309, + "eval_samples_per_second": 434.228, + "eval_steps_per_second": 3.49, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0006668340314065332, + "loss": 0.7524, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8708425720620843, + "eval_f1": 0.8751593707457973, + "eval_loss": 0.8690881729125977, + "eval_precision": 0.8858550210130286, + "eval_recall": 0.8708425720620843, + "eval_runtime": 8.1915, + "eval_samples_per_second": 440.457, + "eval_steps_per_second": 3.54, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0006462874337409932, + "loss": 0.7652, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0006242105417693606, + "loss": 0.7559, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8786031042128604, + "eval_f1": 0.8668474013574211, + "eval_loss": 0.877130925655365, + "eval_precision": 0.8683425004570275, + "eval_recall": 0.8786031042128604, + "eval_runtime": 8.2125, + "eval_samples_per_second": 439.332, + "eval_steps_per_second": 3.531, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006007319451094783, + "loss": 0.7506, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0005759883977834181, + "loss": 0.7508, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8808203991130821, + "eval_f1": 0.8787970646382295, + "eval_loss": 0.8647807240486145, + "eval_precision": 0.8800472409367005, + "eval_recall": 0.8808203991130821, + "eval_runtime": 8.0208, + "eval_samples_per_second": 449.832, + "eval_steps_per_second": 3.616, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00055012402167658, + "loss": 0.7375, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8841849297244676, + "eval_loss": 0.8476831912994385, + "eval_precision": 0.8832339068482002, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.7906, + "eval_samples_per_second": 463.12, + "eval_steps_per_second": 3.722, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005232894670817497, + "loss": 0.7453, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0004956410352176292, + "loss": 0.7404, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.8953801524204328, + "eval_loss": 0.8285048007965088, + "eval_precision": 0.8961409144782271, + "eval_recall": 0.9016075388026608, + "eval_runtime": 7.8559, + "eval_samples_per_second": 459.274, + "eval_steps_per_second": 3.692, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0004673397678328449, + "loss": 0.7306, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00043855050919814606, + "loss": 0.7361, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.8708947259813121, + "eval_loss": 0.8673096895217896, + "eval_precision": 0.8753090180371028, + "eval_recall": 0.8891352549889135, + "eval_runtime": 7.7805, + "eval_samples_per_second": 463.723, + "eval_steps_per_second": 3.727, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0004094409459503364, + "loss": 0.7258, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.858370288248337, + "eval_f1": 0.8669292820949759, + "eval_loss": 0.8937473893165588, + "eval_precision": 0.8839535061570079, + "eval_recall": 0.858370288248337, + "eval_runtime": 8.1815, + "eval_samples_per_second": 440.996, + "eval_steps_per_second": 3.545, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003801806303804908, + "loss": 0.7236, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003509399928554339, + "loss": 0.7207, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.8906639857373484, + "eval_loss": 0.8336716294288635, + "eval_precision": 0.8908135356533609, + "eval_recall": 0.9024390243902439, + "eval_runtime": 7.3376, + "eval_samples_per_second": 491.717, + "eval_steps_per_second": 3.952, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0003218893491247583, + "loss": 0.7139, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.0002931979082954435, + "loss": 0.7144, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9074279379157428, + "eval_f1": 0.9035783041513764, + "eval_loss": 0.8186041116714478, + "eval_precision": 0.9018457818068526, + "eval_recall": 0.9074279379157428, + "eval_runtime": 8.0479, + "eval_samples_per_second": 448.318, + "eval_steps_per_second": 3.603, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00026503278725225466, + "loss": 0.7056, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8980044345898004, + "eval_f1": 0.8944383555040252, + "eval_loss": 0.840421199798584, + "eval_precision": 0.8921125033262503, + "eval_recall": 0.8980044345898004, + "eval_runtime": 7.9671, + "eval_samples_per_second": 452.862, + "eval_steps_per_second": 3.64, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.0002375580372645499, + "loss": 0.7062, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00021093368844914926, + "loss": 0.7032, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.907658605512251, + "eval_loss": 0.8160489201545715, + "eval_precision": 0.9077101549425725, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0499, + "eval_samples_per_second": 448.204, + "eval_steps_per_second": 3.603, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00018531481765490774, + "loss": 0.7023, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9044269404409218, + "eval_loss": 0.8235645294189453, + "eval_precision": 0.9011951061053649, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.0955, + "eval_samples_per_second": 445.682, + "eval_steps_per_second": 3.582, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00016085064519821878, + "loss": 0.7006, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.00013768366571062214, + "loss": 0.6986, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9112168034041541, + "eval_loss": 0.8147718906402588, + "eval_precision": 0.9104608840844185, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9507, + "eval_samples_per_second": 453.798, + "eval_steps_per_second": 3.647, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 0.00011594881816100437, + "loss": 0.6951, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 9.57726998867019e-05, + "loss": 0.6966, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9105551478729389, + "eval_loss": 0.8192415237426758, + "eval_precision": 0.9098168199209872, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9728, + "eval_samples_per_second": 452.541, + "eval_steps_per_second": 3.637, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 7.727282921147948e-05, + "loss": 0.6931, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9123695735786569, + "eval_loss": 0.8164503574371338, + "eval_precision": 0.9105149091511331, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.4691, + "eval_samples_per_second": 426.019, + "eval_steps_per_second": 3.424, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 6.0556960945358084e-05, + "loss": 0.6873, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 4.572245875325085e-05, + "loss": 0.695, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9084082229862377, + "eval_loss": 0.8206462264060974, + "eval_precision": 0.9063677392438179, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.2618, + "eval_samples_per_second": 436.711, + "eval_steps_per_second": 3.51, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.285572804812171e-05, + "loss": 0.6917, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.2031712711853046e-05, + "loss": 0.6893, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9068072150760188, + "eval_loss": 0.8162545561790466, + "eval_precision": 0.9043807502037643, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.825, + "eval_samples_per_second": 461.085, + "eval_steps_per_second": 3.706, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.3313458575234895e-05, + "loss": 0.6879, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9126523674717834, + "eval_loss": 0.8112335801124573, + "eval_precision": 0.9110965522314393, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.7306, + "eval_samples_per_second": 466.716, + "eval_steps_per_second": 3.751, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 6.751746199639889e-06, + "loss": 0.6934, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.3847950992937473e-06, + "loss": 0.6905, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9157855128217186, + "eval_loss": 0.8093068599700928, + "eval_precision": 0.9151035762840671, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.1829, + "eval_samples_per_second": 440.918, + "eval_steps_per_second": 3.544, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 2.3804112693576326e-07, + "loss": 0.6856, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9069675434335235, + "eval_loss": 0.8146656155586243, + "eval_precision": 0.9055539407941134, + "eval_recall": 0.9110310421286031, + "eval_runtime": 7.6673, + "eval_samples_per_second": 470.569, + "eval_steps_per_second": 3.782, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.16830090820578728, + "learning_rate": 0.0007671181059294216, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-d5dfkzm5/checkpoint-630/training_args.bin b/run-d5dfkzm5/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f225e51c7e0c6a248595b7c54c0455f91ec9f856 --- /dev/null +++ b/run-d5dfkzm5/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:618c54aaa16df17c9ef2644875759663934b38c311ac5bdb1f0cae73d28223b0 +size 4792 diff --git a/run-e8q38n7o/checkpoint-1232/model.safetensors b/run-e8q38n7o/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32c1c7b3da8a52db20cd27c6f36891d06fe37d29 --- /dev/null +++ b/run-e8q38n7o/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6dd3fe506074ecfea1582d148daa498df167b331cc43aaf178a54d69cd2174 +size 198025308 diff --git a/run-e8q38n7o/checkpoint-1232/optimizer.pt b/run-e8q38n7o/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd09213c621bad69fcee346b2fb9bfa214c9fbaf --- /dev/null +++ b/run-e8q38n7o/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e512a8d77d69dcba46e393bc0f3e8b907b28f2ec4c18bfd08d54c079072c14 +size 395900602 diff --git a/run-e8q38n7o/checkpoint-1232/rng_state.pth b/run-e8q38n7o/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-e8q38n7o/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-e8q38n7o/checkpoint-1232/scheduler.pt b/run-e8q38n7o/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..66c30d7a9f5aaf0faf4236a56e028959c7e4f840 --- /dev/null +++ b/run-e8q38n7o/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48bc94527e2cf9fd3b5244a735aa207e1c37ef6b9c3a4ead85ec35888bfb86d8 +size 1064 diff --git a/run-e8q38n7o/checkpoint-1232/trainer_state.json b/run-e8q38n7o/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9168879ca6b6391b4d2320bc2dfcbf8132e1182d --- /dev/null +++ b/run-e8q38n7o/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9221175166297118, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-e8q38n7o/checkpoint-1147", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.644646911700437e-05, + "loss": 1.4051, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_loss": 0.9463421106338501, + "eval_runtime": 6.8289, + "eval_samples_per_second": 528.344, + "eval_steps_per_second": 8.347, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.289293823400874e-05, + "loss": 0.9822, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 4.93394073510131e-05, + "loss": 0.8747, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8347688913345337, + "eval_runtime": 6.8721, + "eval_samples_per_second": 525.02, + "eval_steps_per_second": 8.294, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 6.578587646801747e-05, + "loss": 0.8246, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8076615929603577, + "eval_runtime": 6.9238, + "eval_samples_per_second": 521.104, + "eval_steps_per_second": 8.233, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 8.223234558502183e-05, + "loss": 0.8018, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 9.86788147020262e-05, + "loss": 0.7892, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.818993866443634, + "eval_runtime": 6.679, + "eval_samples_per_second": 540.202, + "eval_steps_per_second": 8.534, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011512528381903057, + "loss": 0.7802, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00013157175293603495, + "loss": 0.7733, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8003050088882446, + "eval_runtime": 7.0374, + "eval_samples_per_second": 512.691, + "eval_steps_per_second": 8.1, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00014801822205303932, + "loss": 0.7709, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8131498694419861, + "eval_runtime": 7.2098, + "eval_samples_per_second": 500.433, + "eval_steps_per_second": 7.906, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016446469117004366, + "loss": 0.764, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00018091116028704803, + "loss": 0.7564, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8200504779815674, + "eval_runtime": 7.0049, + "eval_samples_per_second": 515.065, + "eval_steps_per_second": 8.137, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0001973576294040524, + "loss": 0.7572, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00021380409852105677, + "loss": 0.7522, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.821529746055603, + "eval_runtime": 6.5866, + "eval_samples_per_second": 547.779, + "eval_steps_per_second": 8.654, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00023025056763806113, + "loss": 0.7439, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.88470066518847, + "eval_loss": 0.8570214509963989, + "eval_runtime": 6.6323, + "eval_samples_per_second": 544.006, + "eval_steps_per_second": 8.594, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0002466970367550655, + "loss": 0.7603, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0002631435058720699, + "loss": 0.7515, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.855618417263031, + "eval_runtime": 6.7453, + "eval_samples_per_second": 534.889, + "eval_steps_per_second": 8.45, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002652243349274603, + "loss": 0.7495, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8248185515403748, + "eval_runtime": 6.8386, + "eval_samples_per_second": 527.596, + "eval_steps_per_second": 8.335, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002635389888050651, + "loss": 0.7439, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00026061875267900696, + "loss": 0.7359, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8300149440765381, + "eval_runtime": 6.79, + "eval_samples_per_second": 531.371, + "eval_steps_per_second": 8.395, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002564912173054379, + "loss": 0.7376, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0002511953801547973, + "loss": 0.7334, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8349573016166687, + "eval_runtime": 6.84, + "eval_samples_per_second": 527.482, + "eval_steps_per_second": 8.333, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002447812769588335, + "loss": 0.731, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8385207056999207, + "eval_runtime": 6.8958, + "eval_samples_per_second": 523.214, + "eval_steps_per_second": 8.266, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0002373095089667521, + "loss": 0.7338, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002288506703770345, + "loss": 0.7224, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8741685144124168, + "eval_loss": 0.874174952507019, + "eval_runtime": 6.9389, + "eval_samples_per_second": 519.969, + "eval_steps_per_second": 8.215, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00021948468135462115, + "loss": 0.7254, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00020930003293519767, + "loss": 0.7241, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8988359201773836, + "eval_loss": 0.8423943519592285, + "eval_runtime": 6.9721, + "eval_samples_per_second": 517.494, + "eval_steps_per_second": 8.175, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00019839295095082214, + "loss": 0.7224, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8282972574234009, + "eval_runtime": 6.7228, + "eval_samples_per_second": 536.684, + "eval_steps_per_second": 8.479, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00018686648687622903, + "loss": 0.7124, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00017482954418560694, + "loss": 0.713, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8070340156555176, + "eval_runtime": 6.7399, + "eval_samples_per_second": 535.323, + "eval_steps_per_second": 8.457, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00016239584941895425, + "loss": 0.7088, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00014968287767950546, + "loss": 0.706, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8211929798126221, + "eval_runtime": 6.8048, + "eval_samples_per_second": 530.217, + "eval_steps_per_second": 8.376, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001368107427142629, + "loss": 0.7101, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8162717223167419, + "eval_runtime": 6.4633, + "eval_samples_per_second": 558.226, + "eval_steps_per_second": 8.819, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00012390106206429293, + "loss": 0.7022, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00011107580800698696, + "loss": 0.7029, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.813727617263794, + "eval_runtime": 6.9495, + "eval_samples_per_second": 519.172, + "eval_steps_per_second": 8.202, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 9.845615514672967e-05, + "loss": 0.6964, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8221824765205383, + "eval_runtime": 6.6943, + "eval_samples_per_second": 538.967, + "eval_steps_per_second": 8.515, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 8.616133554208159e-05, + "loss": 0.6988, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 7.430751218637738e-05, + "loss": 0.697, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8089330196380615, + "eval_runtime": 6.886, + "eval_samples_per_second": 523.965, + "eval_steps_per_second": 8.278, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.300668148523646e-05, + "loss": 0.6941, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.236561510051525e-05, + "loss": 0.6927, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8071280717849731, + "eval_runtime": 7.167, + "eval_samples_per_second": 503.42, + "eval_steps_per_second": 7.953, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.2484851158291996e-05, + "loss": 0.6888, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8069462180137634, + "eval_runtime": 6.881, + "eval_samples_per_second": 524.339, + "eval_steps_per_second": 8.284, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.345774435207758e-05, + "loss": 0.6894, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.5369583915995038e-05, + "loss": 0.6868, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8122897744178772, + "eval_runtime": 6.7639, + "eval_samples_per_second": 533.423, + "eval_steps_per_second": 8.427, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.82967878014286e-05, + "loss": 0.6883, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.23061806706616e-05, + "loss": 0.6906, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.801467776298523, + "eval_runtime": 6.8146, + "eval_samples_per_second": 529.454, + "eval_steps_per_second": 8.364, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 7.4543625291092e-06, + "loss": 0.6891, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8042111396789551, + "eval_runtime": 6.6914, + "eval_samples_per_second": 539.202, + "eval_steps_per_second": 8.518, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 3.787173961388448e-06, + "loss": 0.6816, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.339263023741539e-06, + "loss": 0.6849, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8104687929153442, + "eval_runtime": 6.6312, + "eval_samples_per_second": 544.092, + "eval_steps_per_second": 8.596, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00026567373189007054, + "metric": "eval/loss", + "warmup_ratio": 0.3330468637782364 + } +} diff --git a/run-e8q38n7o/checkpoint-1232/training_args.bin b/run-e8q38n7o/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..26f0e527872f07c69a1107a4696e32ea0c6400b0 --- /dev/null +++ b/run-e8q38n7o/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:252242322f29c2f30e6dd776efa31cd7d658afce8f43fcc7cac9845fdf28f3e8 +size 4792 diff --git a/run-e8q38n7o/checkpoint-1260/model.safetensors b/run-e8q38n7o/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4e434acf57b6a47c31e4e09ca8aa4d88372bae5 --- /dev/null +++ b/run-e8q38n7o/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8148b3df56c172051f3da0e407f728edbe5928ae8b3bf185623f39b8c2acc8a +size 198025308 diff --git a/run-e8q38n7o/checkpoint-1260/optimizer.pt b/run-e8q38n7o/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fdadf1021f4244db0c49a633a7fa106211dfc3a1 --- /dev/null +++ b/run-e8q38n7o/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdee2730079bf732c39c3f9e15f059905c4a99a7e369fdefbc00254c6d65e544 +size 395900602 diff --git a/run-e8q38n7o/checkpoint-1260/rng_state.pth b/run-e8q38n7o/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-e8q38n7o/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-e8q38n7o/checkpoint-1260/scheduler.pt b/run-e8q38n7o/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e983b195675df9747f7938bfbf244163ee6ee08b --- /dev/null +++ b/run-e8q38n7o/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22733f2dfbc81e0b234d4767604d99933613423fd952617b0c57a33184ec11a7 +size 1064 diff --git a/run-e8q38n7o/checkpoint-1260/trainer_state.json b/run-e8q38n7o/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7332814f3b4de8c91091edf5f4064ac51d194a88 --- /dev/null +++ b/run-e8q38n7o/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9246119733924612, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-e8q38n7o/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.644646911700437e-05, + "loss": 1.4051, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_loss": 0.9463421106338501, + "eval_runtime": 6.8289, + "eval_samples_per_second": 528.344, + "eval_steps_per_second": 8.347, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.289293823400874e-05, + "loss": 0.9822, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 4.93394073510131e-05, + "loss": 0.8747, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8347688913345337, + "eval_runtime": 6.8721, + "eval_samples_per_second": 525.02, + "eval_steps_per_second": 8.294, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 6.578587646801747e-05, + "loss": 0.8246, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8076615929603577, + "eval_runtime": 6.9238, + "eval_samples_per_second": 521.104, + "eval_steps_per_second": 8.233, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 8.223234558502183e-05, + "loss": 0.8018, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 9.86788147020262e-05, + "loss": 0.7892, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.818993866443634, + "eval_runtime": 6.679, + "eval_samples_per_second": 540.202, + "eval_steps_per_second": 8.534, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011512528381903057, + "loss": 0.7802, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00013157175293603495, + "loss": 0.7733, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8003050088882446, + "eval_runtime": 7.0374, + "eval_samples_per_second": 512.691, + "eval_steps_per_second": 8.1, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00014801822205303932, + "loss": 0.7709, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8131498694419861, + "eval_runtime": 7.2098, + "eval_samples_per_second": 500.433, + "eval_steps_per_second": 7.906, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016446469117004366, + "loss": 0.764, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00018091116028704803, + "loss": 0.7564, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8200504779815674, + "eval_runtime": 7.0049, + "eval_samples_per_second": 515.065, + "eval_steps_per_second": 8.137, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0001973576294040524, + "loss": 0.7572, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00021380409852105677, + "loss": 0.7522, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.821529746055603, + "eval_runtime": 6.5866, + "eval_samples_per_second": 547.779, + "eval_steps_per_second": 8.654, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00023025056763806113, + "loss": 0.7439, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.88470066518847, + "eval_loss": 0.8570214509963989, + "eval_runtime": 6.6323, + "eval_samples_per_second": 544.006, + "eval_steps_per_second": 8.594, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0002466970367550655, + "loss": 0.7603, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0002631435058720699, + "loss": 0.7515, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.855618417263031, + "eval_runtime": 6.7453, + "eval_samples_per_second": 534.889, + "eval_steps_per_second": 8.45, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002652243349274603, + "loss": 0.7495, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8248185515403748, + "eval_runtime": 6.8386, + "eval_samples_per_second": 527.596, + "eval_steps_per_second": 8.335, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002635389888050651, + "loss": 0.7439, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00026061875267900696, + "loss": 0.7359, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8300149440765381, + "eval_runtime": 6.79, + "eval_samples_per_second": 531.371, + "eval_steps_per_second": 8.395, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002564912173054379, + "loss": 0.7376, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0002511953801547973, + "loss": 0.7334, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8349573016166687, + "eval_runtime": 6.84, + "eval_samples_per_second": 527.482, + "eval_steps_per_second": 8.333, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002447812769588335, + "loss": 0.731, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8385207056999207, + "eval_runtime": 6.8958, + "eval_samples_per_second": 523.214, + "eval_steps_per_second": 8.266, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0002373095089667521, + "loss": 0.7338, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002288506703770345, + "loss": 0.7224, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8741685144124168, + "eval_loss": 0.874174952507019, + "eval_runtime": 6.9389, + "eval_samples_per_second": 519.969, + "eval_steps_per_second": 8.215, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00021948468135462115, + "loss": 0.7254, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00020930003293519767, + "loss": 0.7241, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8988359201773836, + "eval_loss": 0.8423943519592285, + "eval_runtime": 6.9721, + "eval_samples_per_second": 517.494, + "eval_steps_per_second": 8.175, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00019839295095082214, + "loss": 0.7224, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8282972574234009, + "eval_runtime": 6.7228, + "eval_samples_per_second": 536.684, + "eval_steps_per_second": 8.479, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00018686648687622903, + "loss": 0.7124, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00017482954418560694, + "loss": 0.713, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8070340156555176, + "eval_runtime": 6.7399, + "eval_samples_per_second": 535.323, + "eval_steps_per_second": 8.457, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00016239584941895425, + "loss": 0.7088, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00014968287767950546, + "loss": 0.706, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8211929798126221, + "eval_runtime": 6.8048, + "eval_samples_per_second": 530.217, + "eval_steps_per_second": 8.376, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001368107427142629, + "loss": 0.7101, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8162717223167419, + "eval_runtime": 6.4633, + "eval_samples_per_second": 558.226, + "eval_steps_per_second": 8.819, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00012390106206429293, + "loss": 0.7022, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00011107580800698696, + "loss": 0.7029, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.813727617263794, + "eval_runtime": 6.9495, + "eval_samples_per_second": 519.172, + "eval_steps_per_second": 8.202, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 9.845615514672967e-05, + "loss": 0.6964, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8221824765205383, + "eval_runtime": 6.6943, + "eval_samples_per_second": 538.967, + "eval_steps_per_second": 8.515, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 8.616133554208159e-05, + "loss": 0.6988, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 7.430751218637738e-05, + "loss": 0.697, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8089330196380615, + "eval_runtime": 6.886, + "eval_samples_per_second": 523.965, + "eval_steps_per_second": 8.278, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.300668148523646e-05, + "loss": 0.6941, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.236561510051525e-05, + "loss": 0.6927, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8071280717849731, + "eval_runtime": 7.167, + "eval_samples_per_second": 503.42, + "eval_steps_per_second": 7.953, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.2484851158291996e-05, + "loss": 0.6888, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8069462180137634, + "eval_runtime": 6.881, + "eval_samples_per_second": 524.339, + "eval_steps_per_second": 8.284, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.345774435207758e-05, + "loss": 0.6894, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.5369583915995038e-05, + "loss": 0.6868, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8122897744178772, + "eval_runtime": 6.7639, + "eval_samples_per_second": 533.423, + "eval_steps_per_second": 8.427, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.82967878014286e-05, + "loss": 0.6883, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.23061806706616e-05, + "loss": 0.6906, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.801467776298523, + "eval_runtime": 6.8146, + "eval_samples_per_second": 529.454, + "eval_steps_per_second": 8.364, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 7.4543625291092e-06, + "loss": 0.6891, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8042111396789551, + "eval_runtime": 6.6914, + "eval_samples_per_second": 539.202, + "eval_steps_per_second": 8.518, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 3.787173961388448e-06, + "loss": 0.6816, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.339263023741539e-06, + "loss": 0.6849, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8104687929153442, + "eval_runtime": 6.6312, + "eval_samples_per_second": 544.092, + "eval_steps_per_second": 8.596, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.337578848669646e-07, + "loss": 0.6887, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7983529567718506, + "eval_runtime": 7.0596, + "eval_samples_per_second": 511.074, + "eval_steps_per_second": 8.074, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00026567373189007054, + "metric": "eval/loss", + "warmup_ratio": 0.3330468637782364 + } +} diff --git a/run-e8q38n7o/checkpoint-1260/training_args.bin b/run-e8q38n7o/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..26f0e527872f07c69a1107a4696e32ea0c6400b0 --- /dev/null +++ b/run-e8q38n7o/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:252242322f29c2f30e6dd776efa31cd7d658afce8f43fcc7cac9845fdf28f3e8 +size 4792 diff --git a/run-fx97n43d/checkpoint-488/model.safetensors b/run-fx97n43d/checkpoint-488/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae00a5a0ad4e2b0b6a433d121a01588f1fd814b2 --- /dev/null +++ b/run-fx97n43d/checkpoint-488/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03834d041e1ca5d23ffd7dba15b663a8b744e0cedc74f8b2720869e51512b95e +size 198025308 diff --git a/run-fx97n43d/checkpoint-488/optimizer.pt b/run-fx97n43d/checkpoint-488/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..018b43118c6e0a8e9a587087f5a87d4da2f0fddb --- /dev/null +++ b/run-fx97n43d/checkpoint-488/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77cd413f4ea4a0675e83eec3e91f8afbd345940d06b1302ff72d731c75d4a6be +size 395900602 diff --git a/run-fx97n43d/checkpoint-488/rng_state.pth b/run-fx97n43d/checkpoint-488/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4e242cd45cd82eec2c9f5f7a343995f3fc07291 --- /dev/null +++ b/run-fx97n43d/checkpoint-488/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a77e9678cdc4e4bb78c6028d260e7fc1b894ddf6c0f054c0c0d7c6e8bb4722d +size 14244 diff --git a/run-fx97n43d/checkpoint-488/scheduler.pt b/run-fx97n43d/checkpoint-488/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2e141ee68fdf183d4a340f88fe3ac855cabfae5 --- /dev/null +++ b/run-fx97n43d/checkpoint-488/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a221f8e32d9dc97d85723e8032cca5277fa7123657c9ddfaa400a48eb89b79ab +size 1064 diff --git a/run-fx97n43d/checkpoint-488/trainer_state.json b/run-fx97n43d/checkpoint-488/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..93932483a579859d3e8c619405c0248f594d8f35 --- /dev/null +++ b/run-fx97n43d/checkpoint-488/trainer_state.json @@ -0,0 +1,526 @@ +{ + "best_metric": 0.9210770013650191, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-fx97n43d/checkpoint-488", + "epoch": 22.96470588235294, + "eval_steps": 500, + "global_step": 488, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.5510039521887183e-05, + "loss": 1.478, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 1.0341465473175049, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.3797, + "eval_samples_per_second": 430.565, + "eval_steps_per_second": 3.461, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.1020079043774365e-05, + "loss": 1.1162, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.6530118565661544e-05, + "loss": 0.9424, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8705654101995566, + "eval_f1": 0.8389699621462066, + "eval_loss": 0.9252289533615112, + "eval_precision": 0.8517874429530109, + "eval_recall": 0.8705654101995566, + "eval_runtime": 7.8428, + "eval_samples_per_second": 460.041, + "eval_steps_per_second": 3.698, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.204015808754873e-05, + "loss": 0.8696, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8946612312334215, + "eval_loss": 0.9877755045890808, + "eval_precision": 0.8961631624753195, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.579, + "eval_samples_per_second": 476.05, + "eval_steps_per_second": 3.826, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.755019760943591e-05, + "loss": 0.8273, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.306023713132309e-05, + "loss": 0.7988, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9080269220857267, + "eval_loss": 0.8059695363044739, + "eval_precision": 0.9037754375795349, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9757, + "eval_samples_per_second": 452.374, + "eval_steps_per_second": 3.636, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010857027665321027, + "loss": 0.798, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011326347231548932, + "loss": 0.774, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9080705370958632, + "eval_loss": 0.8073797225952148, + "eval_precision": 0.9073476317414986, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.2483, + "eval_samples_per_second": 437.426, + "eval_steps_per_second": 3.516, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011287035231995169, + "loss": 0.7733, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9128434775291835, + "eval_loss": 0.8052764534950256, + "eval_precision": 0.910962080004432, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.2107, + "eval_samples_per_second": 439.429, + "eval_steps_per_second": 3.532, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011214989390672101, + "loss": 0.7647, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001111062934759132, + "loss": 0.7583, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9138869052547461, + "eval_loss": 0.7971659898757935, + "eval_precision": 0.9104077481882505, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.619, + "eval_samples_per_second": 473.555, + "eval_steps_per_second": 3.806, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00010974562960873211, + "loss": 0.7547, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00010807582766201317, + "loss": 0.7439, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9108299991634333, + "eval_loss": 0.8041123151779175, + "eval_precision": 0.907292999433662, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8549, + "eval_samples_per_second": 459.332, + "eval_steps_per_second": 3.692, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010610661360599156, + "loss": 0.738, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9113645109824469, + "eval_loss": 0.7966068387031555, + "eval_precision": 0.9076893913635398, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.6442, + "eval_samples_per_second": 471.993, + "eval_steps_per_second": 3.794, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010384945737417238, + "loss": 0.7397, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010131750605526875, + "loss": 0.735, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9121076710952039, + "eval_loss": 0.804332435131073, + "eval_precision": 0.9097350024636609, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.136, + "eval_samples_per_second": 443.464, + "eval_steps_per_second": 3.564, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 9.85255073163396e-05, + "loss": 0.7292, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.901414211525136, + "eval_loss": 0.8244433999061584, + "eval_precision": 0.906232190945862, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.1006, + "eval_samples_per_second": 445.397, + "eval_steps_per_second": 3.58, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 9.548972350315878e-05, + "loss": 0.7325, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.222783691814877e-05, + "loss": 0.7236, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8989946911162718, + "eval_loss": 0.8460181951522827, + "eval_precision": 0.9100742788902317, + "eval_recall": 0.893569844789357, + "eval_runtime": 7.883, + "eval_samples_per_second": 457.695, + "eval_steps_per_second": 3.679, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 8.875884682759899e-05, + "loss": 0.7165, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 8.510295879806365e-05, + "loss": 0.7154, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9074187854075062, + "eval_loss": 0.8101195096969604, + "eval_precision": 0.9037000363072012, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9381, + "eval_samples_per_second": 454.518, + "eval_steps_per_second": 3.653, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.12814670065128e-05, + "loss": 0.7157, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9064691154662861, + "eval_loss": 0.8145021200180054, + "eval_precision": 0.9073091371333267, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.9076, + "eval_samples_per_second": 456.272, + "eval_steps_per_second": 3.667, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 7.731663020973596e-05, + "loss": 0.7141, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.323154209542975e-05, + "loss": 0.713, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.904905291376415, + "eval_loss": 0.8159310817718506, + "eval_precision": 0.9062799076312693, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.2278, + "eval_samples_per_second": 499.183, + "eval_steps_per_second": 4.012, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 6.904999677012627e-05, + "loss": 0.7102, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.479635016744447e-05, + "loss": 0.7138, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9123090321473447, + "eval_loss": 0.8077041506767273, + "eval_precision": 0.9126418966766847, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.5812, + "eval_samples_per_second": 475.917, + "eval_steps_per_second": 3.825, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.0495378183908996e-05, + "loss": 0.7031, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9172330986693904, + "eval_loss": 0.8018594980239868, + "eval_precision": 0.914982087273544, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.7209, + "eval_samples_per_second": 467.301, + "eval_steps_per_second": 3.756, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.617213236864303e-05, + "loss": 0.7066, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.185179400748782e-05, + "loss": 0.7025, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9121013658355167, + "eval_loss": 0.8063293099403381, + "eval_precision": 0.9101548570689432, + "eval_recall": 0.9157427937915743, + "eval_runtime": 7.6756, + "eval_samples_per_second": 470.059, + "eval_steps_per_second": 3.778, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.7559527451454613e-05, + "loss": 0.7023, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.332033354381553e-05, + "loss": 0.7039, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9110959153728131, + "eval_loss": 0.8079176545143127, + "eval_precision": 0.9076920456818248, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.7227, + "eval_samples_per_second": 467.196, + "eval_steps_per_second": 3.755, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.915890399956582e-05, + "loss": 0.7007, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.9018109542637969, + "eval_loss": 0.8286749720573425, + "eval_precision": 0.9064557605321089, + "eval_recall": 0.8999445676274944, + "eval_runtime": 7.6911, + "eval_samples_per_second": 469.114, + "eval_steps_per_second": 3.771, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.509947758544222e-05, + "loss": 0.7008, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.1165698938195404e-05, + "loss": 0.7013, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9160103903036274, + "eval_loss": 0.8033446669578552, + "eval_precision": 0.9157986272829293, + "eval_recall": 0.9196230598669624, + "eval_runtime": 7.6705, + "eval_samples_per_second": 470.371, + "eval_steps_per_second": 3.781, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.738048084344645e-05, + "loss": 0.698, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9105505264344369, + "eval_loss": 0.8064530491828918, + "eval_precision": 0.9079365511507042, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8595, + "eval_samples_per_second": 459.061, + "eval_steps_per_second": 3.69, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.3765870777302054e-05, + "loss": 0.6984, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.0342922488073023e-05, + "loss": 0.6962, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9254434589800443, + "eval_f1": 0.9210770013650191, + "eval_loss": 0.7944092154502869, + "eval_precision": 0.9202816675752099, + "eval_recall": 0.9254434589800443, + "eval_runtime": 7.8528, + "eval_samples_per_second": 459.456, + "eval_steps_per_second": 3.693, + "step": 488 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.18565388367817143, + "learning_rate": 0.00011334259650609864, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-fx97n43d/checkpoint-488/training_args.bin b/run-fx97n43d/checkpoint-488/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c671d6ed3c01c6edf501a4db0db79383a7a87747 --- /dev/null +++ b/run-fx97n43d/checkpoint-488/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e4005403bfccbf15c4be41c1453abc37a8759b871629046a645f8884fab9c5 +size 4792 diff --git a/run-fx97n43d/checkpoint-630/model.safetensors b/run-fx97n43d/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2327b770c28df0aa458440797746ec47d31e1c3d --- /dev/null +++ b/run-fx97n43d/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:314d7fd3646ba4962c0ebb5537d36c0e7635cd15c4800689d903608029d7f1aa +size 198025308 diff --git a/run-fx97n43d/checkpoint-630/optimizer.pt b/run-fx97n43d/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e4bb6e84803f38d68d690dfcc88b9b3d48b5254 --- /dev/null +++ b/run-fx97n43d/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ecf309ddc5ff7c7a30022dff86c13f692d14256070459bd5c3afd05a9798819 +size 395900602 diff --git a/run-fx97n43d/checkpoint-630/rng_state.pth b/run-fx97n43d/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-fx97n43d/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-fx97n43d/checkpoint-630/scheduler.pt b/run-fx97n43d/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..910059537b175dba47e17085b578b38f08d66310 --- /dev/null +++ b/run-fx97n43d/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32a8bc13168faf8cd7ace7a8d20b67406aff01604f28e73f1aa198ea427f6542 +size 1064 diff --git a/run-fx97n43d/checkpoint-630/trainer_state.json b/run-fx97n43d/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fc2bd76438a1d86eaf6670d585d776f15e8a9e4b --- /dev/null +++ b/run-fx97n43d/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9210770013650191, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-fx97n43d/checkpoint-488", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.5510039521887183e-05, + "loss": 1.478, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 1.0341465473175049, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.3797, + "eval_samples_per_second": 430.565, + "eval_steps_per_second": 3.461, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.1020079043774365e-05, + "loss": 1.1162, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.6530118565661544e-05, + "loss": 0.9424, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8705654101995566, + "eval_f1": 0.8389699621462066, + "eval_loss": 0.9252289533615112, + "eval_precision": 0.8517874429530109, + "eval_recall": 0.8705654101995566, + "eval_runtime": 7.8428, + "eval_samples_per_second": 460.041, + "eval_steps_per_second": 3.698, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.204015808754873e-05, + "loss": 0.8696, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8946612312334215, + "eval_loss": 0.9877755045890808, + "eval_precision": 0.8961631624753195, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.579, + "eval_samples_per_second": 476.05, + "eval_steps_per_second": 3.826, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.755019760943591e-05, + "loss": 0.8273, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.306023713132309e-05, + "loss": 0.7988, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9080269220857267, + "eval_loss": 0.8059695363044739, + "eval_precision": 0.9037754375795349, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9757, + "eval_samples_per_second": 452.374, + "eval_steps_per_second": 3.636, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010857027665321027, + "loss": 0.798, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011326347231548932, + "loss": 0.774, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9080705370958632, + "eval_loss": 0.8073797225952148, + "eval_precision": 0.9073476317414986, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.2483, + "eval_samples_per_second": 437.426, + "eval_steps_per_second": 3.516, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011287035231995169, + "loss": 0.7733, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9128434775291835, + "eval_loss": 0.8052764534950256, + "eval_precision": 0.910962080004432, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.2107, + "eval_samples_per_second": 439.429, + "eval_steps_per_second": 3.532, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011214989390672101, + "loss": 0.7647, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001111062934759132, + "loss": 0.7583, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9138869052547461, + "eval_loss": 0.7971659898757935, + "eval_precision": 0.9104077481882505, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.619, + "eval_samples_per_second": 473.555, + "eval_steps_per_second": 3.806, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00010974562960873211, + "loss": 0.7547, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00010807582766201317, + "loss": 0.7439, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9108299991634333, + "eval_loss": 0.8041123151779175, + "eval_precision": 0.907292999433662, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8549, + "eval_samples_per_second": 459.332, + "eval_steps_per_second": 3.692, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010610661360599156, + "loss": 0.738, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9113645109824469, + "eval_loss": 0.7966068387031555, + "eval_precision": 0.9076893913635398, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.6442, + "eval_samples_per_second": 471.993, + "eval_steps_per_second": 3.794, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010384945737417238, + "loss": 0.7397, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010131750605526875, + "loss": 0.735, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9121076710952039, + "eval_loss": 0.804332435131073, + "eval_precision": 0.9097350024636609, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.136, + "eval_samples_per_second": 443.464, + "eval_steps_per_second": 3.564, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 9.85255073163396e-05, + "loss": 0.7292, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.901414211525136, + "eval_loss": 0.8244433999061584, + "eval_precision": 0.906232190945862, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.1006, + "eval_samples_per_second": 445.397, + "eval_steps_per_second": 3.58, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 9.548972350315878e-05, + "loss": 0.7325, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.222783691814877e-05, + "loss": 0.7236, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.893569844789357, + "eval_f1": 0.8989946911162718, + "eval_loss": 0.8460181951522827, + "eval_precision": 0.9100742788902317, + "eval_recall": 0.893569844789357, + "eval_runtime": 7.883, + "eval_samples_per_second": 457.695, + "eval_steps_per_second": 3.679, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 8.875884682759899e-05, + "loss": 0.7165, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 8.510295879806365e-05, + "loss": 0.7154, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9074187854075062, + "eval_loss": 0.8101195096969604, + "eval_precision": 0.9037000363072012, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9381, + "eval_samples_per_second": 454.518, + "eval_steps_per_second": 3.653, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.12814670065128e-05, + "loss": 0.7157, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9064691154662861, + "eval_loss": 0.8145021200180054, + "eval_precision": 0.9073091371333267, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.9076, + "eval_samples_per_second": 456.272, + "eval_steps_per_second": 3.667, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 7.731663020973596e-05, + "loss": 0.7141, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.323154209542975e-05, + "loss": 0.713, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.904905291376415, + "eval_loss": 0.8159310817718506, + "eval_precision": 0.9062799076312693, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.2278, + "eval_samples_per_second": 499.183, + "eval_steps_per_second": 4.012, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 6.904999677012627e-05, + "loss": 0.7102, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.479635016744447e-05, + "loss": 0.7138, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9123090321473447, + "eval_loss": 0.8077041506767273, + "eval_precision": 0.9126418966766847, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.5812, + "eval_samples_per_second": 475.917, + "eval_steps_per_second": 3.825, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.0495378183908996e-05, + "loss": 0.7031, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9172330986693904, + "eval_loss": 0.8018594980239868, + "eval_precision": 0.914982087273544, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.7209, + "eval_samples_per_second": 467.301, + "eval_steps_per_second": 3.756, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.617213236864303e-05, + "loss": 0.7066, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.185179400748782e-05, + "loss": 0.7025, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9121013658355167, + "eval_loss": 0.8063293099403381, + "eval_precision": 0.9101548570689432, + "eval_recall": 0.9157427937915743, + "eval_runtime": 7.6756, + "eval_samples_per_second": 470.059, + "eval_steps_per_second": 3.778, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.7559527451454613e-05, + "loss": 0.7023, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.332033354381553e-05, + "loss": 0.7039, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9110959153728131, + "eval_loss": 0.8079176545143127, + "eval_precision": 0.9076920456818248, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.7227, + "eval_samples_per_second": 467.196, + "eval_steps_per_second": 3.755, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.915890399956582e-05, + "loss": 0.7007, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.9018109542637969, + "eval_loss": 0.8286749720573425, + "eval_precision": 0.9064557605321089, + "eval_recall": 0.8999445676274944, + "eval_runtime": 7.6911, + "eval_samples_per_second": 469.114, + "eval_steps_per_second": 3.771, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.509947758544222e-05, + "loss": 0.7008, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.1165698938195404e-05, + "loss": 0.7013, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9160103903036274, + "eval_loss": 0.8033446669578552, + "eval_precision": 0.9157986272829293, + "eval_recall": 0.9196230598669624, + "eval_runtime": 7.6705, + "eval_samples_per_second": 470.371, + "eval_steps_per_second": 3.781, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.738048084344645e-05, + "loss": 0.698, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9105505264344369, + "eval_loss": 0.8064530491828918, + "eval_precision": 0.9079365511507042, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8595, + "eval_samples_per_second": 459.061, + "eval_steps_per_second": 3.69, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.3765870777302054e-05, + "loss": 0.6984, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.0342922488073023e-05, + "loss": 0.6962, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9254434589800443, + "eval_f1": 0.9210770013650191, + "eval_loss": 0.7944092154502869, + "eval_precision": 0.9202816675752099, + "eval_recall": 0.9254434589800443, + "eval_runtime": 7.8528, + "eval_samples_per_second": 459.456, + "eval_steps_per_second": 3.693, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.713157336608457e-05, + "loss": 0.694, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.4150528315853432e-05, + "loss": 0.6949, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9143912510261288, + "eval_loss": 0.8028455376625061, + "eval_precision": 0.9127875780947006, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.8711, + "eval_samples_per_second": 458.383, + "eval_steps_per_second": 3.684, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.1417150807032568e-05, + "loss": 0.6955, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9136648704068708, + "eval_loss": 0.8037942051887512, + "eval_precision": 0.9116034432383406, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.2549, + "eval_samples_per_second": 497.317, + "eval_steps_per_second": 3.997, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 8.947361738710916e-06, + "loss": 0.6896, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 6.755546706145982e-06, + "loss": 0.6933, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9215631929046563, + "eval_f1": 0.9176654143229099, + "eval_loss": 0.7998055219650269, + "eval_precision": 0.9156188542239444, + "eval_recall": 0.9215631929046563, + "eval_runtime": 7.9923, + "eval_samples_per_second": 451.436, + "eval_steps_per_second": 3.629, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.854472210065379e-06, + "loss": 0.6935, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.255211296586958e-06, + "loss": 0.6924, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9125953952455105, + "eval_loss": 0.8073236346244812, + "eval_precision": 0.9099114366758283, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.9438, + "eval_samples_per_second": 454.192, + "eval_steps_per_second": 3.651, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.967079060877154e-06, + "loss": 0.6923, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9143585078972356, + "eval_loss": 0.8037804365158081, + "eval_precision": 0.9113053415799426, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.5817, + "eval_samples_per_second": 475.881, + "eval_steps_per_second": 3.825, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 9.975783902143698e-07, + "loss": 0.694, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.5235626248383894e-07, + "loss": 0.6944, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9243348115299335, + "eval_f1": 0.9209391694347883, + "eval_loss": 0.7969254851341248, + "eval_precision": 0.9185286718704604, + "eval_recall": 0.9243348115299335, + "eval_runtime": 7.6301, + "eval_samples_per_second": 472.863, + "eval_steps_per_second": 3.801, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 3.517085464884012e-08, + "loss": 0.6885, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9140916683248415, + "eval_loss": 0.8012341856956482, + "eval_precision": 0.9124792719165044, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.117, + "eval_samples_per_second": 506.956, + "eval_steps_per_second": 4.075, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.18565388367817143, + "learning_rate": 0.00011334259650609864, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-fx97n43d/checkpoint-630/training_args.bin b/run-fx97n43d/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c671d6ed3c01c6edf501a4db0db79383a7a87747 --- /dev/null +++ b/run-fx97n43d/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e4005403bfccbf15c4be41c1453abc37a8759b871629046a645f8884fab9c5 +size 4792 diff --git a/run-g9xskfow/checkpoint-1232/model.safetensors b/run-g9xskfow/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80c27995228c4384ab1cf781df8aacb8d08a8793 --- /dev/null +++ b/run-g9xskfow/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa8b4aca63d5f8ffb309dfd189b1fea1cf3a1bfe234291fcf2bd7567f55b713 +size 198025308 diff --git a/run-g9xskfow/checkpoint-1232/optimizer.pt b/run-g9xskfow/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a8fbb450854a7538eb7b2c467ad717c5d734c01 --- /dev/null +++ b/run-g9xskfow/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4878fbd4eeecffec1ba2bb3fd5519b0b509f934061be9c9f515e0afd7abd4d07 +size 395900602 diff --git a/run-g9xskfow/checkpoint-1232/rng_state.pth b/run-g9xskfow/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-g9xskfow/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-g9xskfow/checkpoint-1232/scheduler.pt b/run-g9xskfow/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a82296cfb4ac288008bfcbc804ac2599b02bc1e --- /dev/null +++ b/run-g9xskfow/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38cdccc2db9e7a87b5d87a0bd67d6b048a56e542476c4c2a6e4310fc1f481472 +size 1064 diff --git a/run-g9xskfow/checkpoint-1232/trainer_state.json b/run-g9xskfow/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fd4281e31753bf2dcf60d789cee979bfbb2e431b --- /dev/null +++ b/run-g9xskfow/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9168514412416852, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-g9xskfow/checkpoint-1190", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.290736860257838e-05, + "loss": 1.2471, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8835920177383592, + "eval_loss": 0.9127263426780701, + "eval_runtime": 6.7381, + "eval_samples_per_second": 535.465, + "eval_steps_per_second": 8.459, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00010581473720515676, + "loss": 0.8832, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00015872210580773513, + "loss": 0.814, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8844235033259423, + "eval_loss": 0.8632220029830933, + "eval_runtime": 6.9366, + "eval_samples_per_second": 520.14, + "eval_steps_per_second": 8.217, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002116294744103135, + "loss": 0.8025, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8138313889503479, + "eval_runtime": 6.8507, + "eval_samples_per_second": 526.662, + "eval_steps_per_second": 8.32, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00026453684301289186, + "loss": 0.7978, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00031744421161547027, + "loss": 0.7834, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.8447096347808838, + "eval_runtime": 6.7009, + "eval_samples_per_second": 538.439, + "eval_steps_per_second": 8.506, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003703515802180487, + "loss": 0.7831, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.000423258948820627, + "loss": 0.7933, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8505235314369202, + "eval_runtime": 6.9153, + "eval_samples_per_second": 521.744, + "eval_steps_per_second": 8.243, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00047616631742320543, + "loss": 0.7899, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6723946784922394, + "eval_loss": 1.1228734254837036, + "eval_runtime": 6.9169, + "eval_samples_per_second": 521.622, + "eval_steps_per_second": 8.241, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005290736860257837, + "loss": 0.7907, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005819810546283621, + "loss": 0.7863, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8575388026607539, + "eval_loss": 0.9258954524993896, + "eval_runtime": 6.7423, + "eval_samples_per_second": 535.131, + "eval_steps_per_second": 8.454, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0006328517905940715, + "loss": 0.8043, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0006315903919424733, + "loss": 0.7995, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8885809312638581, + "eval_loss": 0.8476524353027344, + "eval_runtime": 6.4863, + "eval_samples_per_second": 556.246, + "eval_steps_per_second": 8.788, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0006279956320424565, + "loss": 0.8015, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8672394678492239, + "eval_loss": 0.9128186106681824, + "eval_runtime": 6.3993, + "eval_samples_per_second": 563.811, + "eval_steps_per_second": 8.907, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0006220941252386247, + "loss": 0.8036, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0006139295642225982, + "loss": 0.8014, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8841463414634146, + "eval_loss": 0.8665950298309326, + "eval_runtime": 6.5657, + "eval_samples_per_second": 549.52, + "eval_steps_per_second": 8.681, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.000603562396547611, + "loss": 0.7901, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8849778270509978, + "eval_loss": 0.8568213582038879, + "eval_runtime": 6.9321, + "eval_samples_per_second": 520.475, + "eval_steps_per_second": 8.223, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005910693770959758, + "loss": 0.7858, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0005765429998127897, + "loss": 0.7826, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8888580931263859, + "eval_loss": 0.8588441014289856, + "eval_runtime": 6.529, + "eval_samples_per_second": 552.613, + "eval_steps_per_second": 8.73, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0005600908129131252, + "loss": 0.7839, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0005418346226326737, + "loss": 0.7735, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8860864745011087, + "eval_loss": 0.8602633476257324, + "eval_runtime": 6.6022, + "eval_samples_per_second": 546.481, + "eval_steps_per_second": 8.633, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0005219095914169925, + "loss": 0.7683, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8899667405764967, + "eval_loss": 0.8505738973617554, + "eval_runtime": 6.729, + "eval_samples_per_second": 536.186, + "eval_steps_per_second": 8.471, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005004632372260478, + "loss": 0.7736, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0004776543413628498, + "loss": 0.7581, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8391976952552795, + "eval_runtime": 6.9479, + "eval_samples_per_second": 519.292, + "eval_steps_per_second": 8.204, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00045365177291223943, + "loss": 0.7608, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0004286332384932652, + "loss": 0.7559, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8719512195121951, + "eval_loss": 0.8746176958084106, + "eval_runtime": 6.8102, + "eval_samples_per_second": 529.795, + "eval_steps_per_second": 8.37, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00040278396658155013, + "loss": 0.753, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8794345898004434, + "eval_loss": 0.8620092272758484, + "eval_runtime": 6.755, + "eval_samples_per_second": 534.123, + "eval_steps_per_second": 8.438, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00037629533614246403, + "loss": 0.7445, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003493634597282215, + "loss": 0.7425, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8251734972000122, + "eval_runtime": 6.5257, + "eval_samples_per_second": 552.888, + "eval_steps_per_second": 8.735, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0003221877315291593, + "loss": 0.731, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00029496935112891253, + "loss": 0.7323, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9002217294900222, + "eval_loss": 0.8381521701812744, + "eval_runtime": 6.7628, + "eval_samples_per_second": 533.507, + "eval_steps_per_second": 8.428, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00026790983389309107, + "loss": 0.7426, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8361108899116516, + "eval_runtime": 6.747, + "eval_samples_per_second": 534.758, + "eval_steps_per_second": 8.448, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00024120951902001692, + "loss": 0.7211, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00021506608629939217, + "loss": 0.7172, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8313592076301575, + "eval_runtime": 6.8175, + "eval_samples_per_second": 529.223, + "eval_steps_per_second": 8.361, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00018967309256029798, + "loss": 0.7166, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8236117959022522, + "eval_runtime": 6.9363, + "eval_samples_per_second": 520.161, + "eval_steps_per_second": 8.218, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00016521853864415187, + "loss": 0.7168, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001418834775122533, + "loss": 0.712, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8170432448387146, + "eval_runtime": 6.7652, + "eval_samples_per_second": 533.314, + "eval_steps_per_second": 8.425, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00011984067379300337, + "loss": 0.7112, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 9.925332469304095e-05, + "loss": 0.7043, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8157918453216553, + "eval_runtime": 6.4931, + "eval_samples_per_second": 555.663, + "eval_steps_per_second": 8.778, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 8.027385174222248e-05, + "loss": 0.7002, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.806347668170929, + "eval_runtime": 6.5533, + "eval_samples_per_second": 550.559, + "eval_steps_per_second": 8.698, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 6.304277231794241e-05, + "loss": 0.7022, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 4.7687659303634066e-05, + "loss": 0.6973, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8112438321113586, + "eval_runtime": 6.9093, + "eval_samples_per_second": 522.191, + "eval_steps_per_second": 8.25, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 3.432219658377394e-05, + "loss": 0.6993, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 2.3045337368175907e-05, + "loss": 0.7001, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.809345543384552, + "eval_runtime": 6.7325, + "eval_samples_per_second": 535.905, + "eval_steps_per_second": 8.466, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.3940571577046412e-05, + "loss": 0.6955, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8096058964729309, + "eval_runtime": 6.3305, + "eval_samples_per_second": 569.939, + "eval_steps_per_second": 9.004, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 7.075307710825677e-06, + "loss": 0.6907, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.5003737812346822e-06, + "loss": 0.6935, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8111439347267151, + "eval_runtime": 6.7584, + "eval_samples_per_second": 533.857, + "eval_steps_per_second": 8.434, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0006328535244385337, + "metric": "eval/loss", + "warmup_ratio": 0.24677624017207336 + } +} diff --git a/run-g9xskfow/checkpoint-1232/training_args.bin b/run-g9xskfow/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd6532b01338c5ac962e42ad9d5ff897b6d34ca6 --- /dev/null +++ b/run-g9xskfow/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5121e0d6105565e15b87ba8f13e23688f8446b5ab9856cd35573ea29dd7351c5 +size 4792 diff --git a/run-g9xskfow/checkpoint-1260/model.safetensors b/run-g9xskfow/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..090f4393f9e3c5f0bb4dbf2a9bca48d5fc007fdb --- /dev/null +++ b/run-g9xskfow/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38b83573eed41ddde6f8c6cb504728c46f09cb93c8667a10b844492a87bf196 +size 198025308 diff --git a/run-g9xskfow/checkpoint-1260/optimizer.pt b/run-g9xskfow/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..54f33d1f0e1db0f8c797552228b7b653e369bd12 --- /dev/null +++ b/run-g9xskfow/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fbbae4a9b97b8755be02a19c390ea18bfc9f309c34a6ed6b5e268fdb457aa6b +size 395900602 diff --git a/run-g9xskfow/checkpoint-1260/rng_state.pth b/run-g9xskfow/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-g9xskfow/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-g9xskfow/checkpoint-1260/scheduler.pt b/run-g9xskfow/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc708db6d739b3894df3fedec18c97dcaecf0f7b --- /dev/null +++ b/run-g9xskfow/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba3aeed1f08e6d92d56a714a37a47a95237e90edb6d58819cc98f5a7c1cc6a4 +size 1064 diff --git a/run-g9xskfow/checkpoint-1260/trainer_state.json b/run-g9xskfow/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e61a7dcf6e51e33b8ae6c6a39ed8cc68917537db --- /dev/null +++ b/run-g9xskfow/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9210088691796009, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-g9xskfow/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.290736860257838e-05, + "loss": 1.2471, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8835920177383592, + "eval_loss": 0.9127263426780701, + "eval_runtime": 6.7381, + "eval_samples_per_second": 535.465, + "eval_steps_per_second": 8.459, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00010581473720515676, + "loss": 0.8832, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00015872210580773513, + "loss": 0.814, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8844235033259423, + "eval_loss": 0.8632220029830933, + "eval_runtime": 6.9366, + "eval_samples_per_second": 520.14, + "eval_steps_per_second": 8.217, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002116294744103135, + "loss": 0.8025, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8138313889503479, + "eval_runtime": 6.8507, + "eval_samples_per_second": 526.662, + "eval_steps_per_second": 8.32, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00026453684301289186, + "loss": 0.7978, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00031744421161547027, + "loss": 0.7834, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.8447096347808838, + "eval_runtime": 6.7009, + "eval_samples_per_second": 538.439, + "eval_steps_per_second": 8.506, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003703515802180487, + "loss": 0.7831, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.000423258948820627, + "loss": 0.7933, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8505235314369202, + "eval_runtime": 6.9153, + "eval_samples_per_second": 521.744, + "eval_steps_per_second": 8.243, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00047616631742320543, + "loss": 0.7899, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6723946784922394, + "eval_loss": 1.1228734254837036, + "eval_runtime": 6.9169, + "eval_samples_per_second": 521.622, + "eval_steps_per_second": 8.241, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005290736860257837, + "loss": 0.7907, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005819810546283621, + "loss": 0.7863, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8575388026607539, + "eval_loss": 0.9258954524993896, + "eval_runtime": 6.7423, + "eval_samples_per_second": 535.131, + "eval_steps_per_second": 8.454, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0006328517905940715, + "loss": 0.8043, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0006315903919424733, + "loss": 0.7995, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8885809312638581, + "eval_loss": 0.8476524353027344, + "eval_runtime": 6.4863, + "eval_samples_per_second": 556.246, + "eval_steps_per_second": 8.788, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0006279956320424565, + "loss": 0.8015, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8672394678492239, + "eval_loss": 0.9128186106681824, + "eval_runtime": 6.3993, + "eval_samples_per_second": 563.811, + "eval_steps_per_second": 8.907, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0006220941252386247, + "loss": 0.8036, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0006139295642225982, + "loss": 0.8014, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8841463414634146, + "eval_loss": 0.8665950298309326, + "eval_runtime": 6.5657, + "eval_samples_per_second": 549.52, + "eval_steps_per_second": 8.681, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.000603562396547611, + "loss": 0.7901, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8849778270509978, + "eval_loss": 0.8568213582038879, + "eval_runtime": 6.9321, + "eval_samples_per_second": 520.475, + "eval_steps_per_second": 8.223, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005910693770959758, + "loss": 0.7858, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0005765429998127897, + "loss": 0.7826, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8888580931263859, + "eval_loss": 0.8588441014289856, + "eval_runtime": 6.529, + "eval_samples_per_second": 552.613, + "eval_steps_per_second": 8.73, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0005600908129131252, + "loss": 0.7839, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0005418346226326737, + "loss": 0.7735, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8860864745011087, + "eval_loss": 0.8602633476257324, + "eval_runtime": 6.6022, + "eval_samples_per_second": 546.481, + "eval_steps_per_second": 8.633, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0005219095914169925, + "loss": 0.7683, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8899667405764967, + "eval_loss": 0.8505738973617554, + "eval_runtime": 6.729, + "eval_samples_per_second": 536.186, + "eval_steps_per_second": 8.471, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005004632372260478, + "loss": 0.7736, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0004776543413628498, + "loss": 0.7581, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8391976952552795, + "eval_runtime": 6.9479, + "eval_samples_per_second": 519.292, + "eval_steps_per_second": 8.204, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00045365177291223943, + "loss": 0.7608, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0004286332384932652, + "loss": 0.7559, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8719512195121951, + "eval_loss": 0.8746176958084106, + "eval_runtime": 6.8102, + "eval_samples_per_second": 529.795, + "eval_steps_per_second": 8.37, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00040278396658155013, + "loss": 0.753, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8794345898004434, + "eval_loss": 0.8620092272758484, + "eval_runtime": 6.755, + "eval_samples_per_second": 534.123, + "eval_steps_per_second": 8.438, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00037629533614246403, + "loss": 0.7445, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003493634597282215, + "loss": 0.7425, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8251734972000122, + "eval_runtime": 6.5257, + "eval_samples_per_second": 552.888, + "eval_steps_per_second": 8.735, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0003221877315291593, + "loss": 0.731, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00029496935112891253, + "loss": 0.7323, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9002217294900222, + "eval_loss": 0.8381521701812744, + "eval_runtime": 6.7628, + "eval_samples_per_second": 533.507, + "eval_steps_per_second": 8.428, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00026790983389309107, + "loss": 0.7426, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8361108899116516, + "eval_runtime": 6.747, + "eval_samples_per_second": 534.758, + "eval_steps_per_second": 8.448, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00024120951902001692, + "loss": 0.7211, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00021506608629939217, + "loss": 0.7172, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8313592076301575, + "eval_runtime": 6.8175, + "eval_samples_per_second": 529.223, + "eval_steps_per_second": 8.361, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00018967309256029798, + "loss": 0.7166, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8236117959022522, + "eval_runtime": 6.9363, + "eval_samples_per_second": 520.161, + "eval_steps_per_second": 8.218, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00016521853864415187, + "loss": 0.7168, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001418834775122533, + "loss": 0.712, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8170432448387146, + "eval_runtime": 6.7652, + "eval_samples_per_second": 533.314, + "eval_steps_per_second": 8.425, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00011984067379300337, + "loss": 0.7112, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 9.925332469304095e-05, + "loss": 0.7043, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8157918453216553, + "eval_runtime": 6.4931, + "eval_samples_per_second": 555.663, + "eval_steps_per_second": 8.778, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 8.027385174222248e-05, + "loss": 0.7002, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.806347668170929, + "eval_runtime": 6.5533, + "eval_samples_per_second": 550.559, + "eval_steps_per_second": 8.698, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 6.304277231794241e-05, + "loss": 0.7022, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 4.7687659303634066e-05, + "loss": 0.6973, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8112438321113586, + "eval_runtime": 6.9093, + "eval_samples_per_second": 522.191, + "eval_steps_per_second": 8.25, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 3.432219658377394e-05, + "loss": 0.6993, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 2.3045337368175907e-05, + "loss": 0.7001, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.809345543384552, + "eval_runtime": 6.7325, + "eval_samples_per_second": 535.905, + "eval_steps_per_second": 8.466, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.3940571577046412e-05, + "loss": 0.6955, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8096058964729309, + "eval_runtime": 6.3305, + "eval_samples_per_second": 569.939, + "eval_steps_per_second": 9.004, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 7.075307710825677e-06, + "loss": 0.6907, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.5003737812346822e-06, + "loss": 0.6935, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8111439347267151, + "eval_runtime": 6.7584, + "eval_samples_per_second": 533.857, + "eval_steps_per_second": 8.434, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 2.496409984596498e-07, + "loss": 0.6935, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8020520210266113, + "eval_runtime": 6.8722, + "eval_samples_per_second": 525.012, + "eval_steps_per_second": 8.294, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0006328535244385337, + "metric": "eval/loss", + "warmup_ratio": 0.24677624017207336 + } +} diff --git a/run-g9xskfow/checkpoint-1260/training_args.bin b/run-g9xskfow/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd6532b01338c5ac962e42ad9d5ff897b6d34ca6 --- /dev/null +++ b/run-g9xskfow/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5121e0d6105565e15b87ba8f13e23688f8446b5ab9856cd35573ea29dd7351c5 +size 4792 diff --git a/run-gp6puav7/checkpoint-63/model.safetensors b/run-gp6puav7/checkpoint-63/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1732c79a5a951fd85189803873b5dc2642d64d92 --- /dev/null +++ b/run-gp6puav7/checkpoint-63/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa0fd629dcf8cc678febf24bc62f2e75a498145fa16555556a01c018715cb4b6 +size 198025308 diff --git a/run-gp6puav7/checkpoint-63/optimizer.pt b/run-gp6puav7/checkpoint-63/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fee3e35231e9edf1a16acce3a1898a5c4897b27 --- /dev/null +++ b/run-gp6puav7/checkpoint-63/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c92160e14845d46c913e48e87a22ff09a60a7c83b3c34b890cdafe6c0c3fabb +size 395900602 diff --git a/run-gp6puav7/checkpoint-63/rng_state.pth b/run-gp6puav7/checkpoint-63/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef2aea881ced28619b068a28b5a7bf3305e7bea5 --- /dev/null +++ b/run-gp6puav7/checkpoint-63/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6473959fb7ade82aadc5c802d009a573c828bb50619e54545254d5729d5ce26d +size 14244 diff --git a/run-gp6puav7/checkpoint-63/scheduler.pt b/run-gp6puav7/checkpoint-63/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d58c672c7ae9d1f25a6d6f5350d9babb1692edb --- /dev/null +++ b/run-gp6puav7/checkpoint-63/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54d1133dda58ccad36d95751873a6d53be3c5a4c43e4454b2191beb82ae19994 +size 1064 diff --git a/run-gp6puav7/checkpoint-63/trainer_state.json b/run-gp6puav7/checkpoint-63/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a4eac2c35d321110878c2c2cb5221569f59e3d4d --- /dev/null +++ b/run-gp6puav7/checkpoint-63/trainer_state.json @@ -0,0 +1,88 @@ +{ + "best_metric": 0.9024390243902439, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-gp6puav7/checkpoint-63", + "epoch": 2.9647058823529413, + "eval_steps": 500, + "global_step": 63, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.8255373206131346e-05, + "loss": 1.4232, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.8284368070953437, + "eval_loss": 0.968960702419281, + "eval_precision": 0.8284368070953437, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2975, + "eval_samples_per_second": 434.829, + "eval_steps_per_second": 3.495, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 5.651074641226269e-05, + "loss": 1.0149, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 8.476611961839403e-05, + "loss": 0.905, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8844235033259423, + "eval_f1": 0.8844235033259423, + "eval_loss": 0.9361611604690552, + "eval_precision": 0.8844235033259423, + "eval_recall": 0.8844235033259423, + "eval_runtime": 7.9986, + "eval_samples_per_second": 451.077, + "eval_steps_per_second": 3.626, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00011302149282452538, + "loss": 0.8352, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.9024390243902439, + "eval_loss": 0.8242219686508179, + "eval_precision": 0.9024390243902439, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.2211, + "eval_samples_per_second": 438.873, + "eval_steps_per_second": 3.528, + "step": 63 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.25066211595717836, + "learning_rate": 0.00020648157342942136, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-gp6puav7/checkpoint-63/training_args.bin b/run-gp6puav7/checkpoint-63/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6ffb56fbedc2f3541c86184ae544ccc94840b43 --- /dev/null +++ b/run-gp6puav7/checkpoint-63/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9621ac2071a17af3fcdd1e8756af787824296705efeef501b42fe8c7f0efb37e +size 4792 diff --git a/run-gp6puav7/checkpoint-85/model.safetensors b/run-gp6puav7/checkpoint-85/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58f0a9bda45c4becf924b6dedd9ecd1215a59a4f --- /dev/null +++ b/run-gp6puav7/checkpoint-85/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d200848f54f8b025e26378f411124109c3fddf35eae51d1f52e19baf65900ffc +size 198025308 diff --git a/run-gp6puav7/checkpoint-85/optimizer.pt b/run-gp6puav7/checkpoint-85/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6736230be485fff4685a94c36dbddeabdfbbd4f7 --- /dev/null +++ b/run-gp6puav7/checkpoint-85/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e60789df2af0e40f747c2df6d4007632fdc5a7965d5b4cf3d356e777d10cd036 +size 395900602 diff --git a/run-gp6puav7/checkpoint-85/rng_state.pth b/run-gp6puav7/checkpoint-85/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..148864de7e559ff84ad8979d5c33cb4c3b4b87ac --- /dev/null +++ b/run-gp6puav7/checkpoint-85/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c4b8e258e21361fedb48c74670f6389fd419f3ab62efeaf13b8e8713245d5ac +size 14244 diff --git a/run-gp6puav7/checkpoint-85/scheduler.pt b/run-gp6puav7/checkpoint-85/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ffc25fa41cbeec3db0a135b1145e0e23a46b8e3 --- /dev/null +++ b/run-gp6puav7/checkpoint-85/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d9ee19314f14b65cef039e8be9e835c56e3442fa7ca2039bde85b704be4be64 +size 1064 diff --git a/run-gp6puav7/checkpoint-85/trainer_state.json b/run-gp6puav7/checkpoint-85/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ed49e0d3fb4c145957a1739789cb7ef9841da511 --- /dev/null +++ b/run-gp6puav7/checkpoint-85/trainer_state.json @@ -0,0 +1,112 @@ +{ + "best_metric": 0.9024390243902439, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-gp6puav7/checkpoint-63", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 85, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.8255373206131346e-05, + "loss": 1.4232, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.8284368070953437, + "eval_loss": 0.968960702419281, + "eval_precision": 0.8284368070953437, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2975, + "eval_samples_per_second": 434.829, + "eval_steps_per_second": 3.495, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 5.651074641226269e-05, + "loss": 1.0149, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 8.476611961839403e-05, + "loss": 0.905, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8844235033259423, + "eval_f1": 0.8844235033259423, + "eval_loss": 0.9361611604690552, + "eval_precision": 0.8844235033259423, + "eval_recall": 0.8844235033259423, + "eval_runtime": 7.9986, + "eval_samples_per_second": 451.077, + "eval_steps_per_second": 3.626, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00011302149282452538, + "loss": 0.8352, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.9024390243902439, + "eval_loss": 0.8242219686508179, + "eval_precision": 0.9024390243902439, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.2211, + "eval_samples_per_second": 438.873, + "eval_steps_per_second": 3.528, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00014127686603065671, + "loss": 0.8019, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00016953223923678806, + "loss": 0.7901, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8871951219512195, + "eval_f1": 0.8871951219512195, + "eval_loss": 0.8388084769248962, + "eval_precision": 0.8871951219512195, + "eval_recall": 0.8871951219512195, + "eval_runtime": 7.7419, + "eval_samples_per_second": 466.033, + "eval_steps_per_second": 3.746, + "step": 85 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.25066211595717836, + "learning_rate": 0.00020648157342942136, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-gp6puav7/checkpoint-85/training_args.bin b/run-gp6puav7/checkpoint-85/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6ffb56fbedc2f3541c86184ae544ccc94840b43 --- /dev/null +++ b/run-gp6puav7/checkpoint-85/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9621ac2071a17af3fcdd1e8756af787824296705efeef501b42fe8c7f0efb37e +size 4792 diff --git a/run-h3xp0oba/checkpoint-616/model.safetensors b/run-h3xp0oba/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..131d8f173c5cb447851c33bef19c4c8c80731210 --- /dev/null +++ b/run-h3xp0oba/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9715b7d21e8988735847ac2ae6820b1caa9477abeadce452a254c0e7035a2a3 +size 198025308 diff --git a/run-h3xp0oba/checkpoint-616/optimizer.pt b/run-h3xp0oba/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e2846c2175e02c1dbbc683ef5e3e63c31d270fd --- /dev/null +++ b/run-h3xp0oba/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7993e6aa7f5343346ac1b884bc3ef10046a3a4d2a86fd77931736bd6765a55a9 +size 395900602 diff --git a/run-h3xp0oba/checkpoint-616/rng_state.pth b/run-h3xp0oba/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-h3xp0oba/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-h3xp0oba/checkpoint-616/scheduler.pt b/run-h3xp0oba/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..35759a7f43765e3580054ae2a3b4695dcd3ce25f --- /dev/null +++ b/run-h3xp0oba/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6b2eff809d66969c044aa9786843c960c95a96c21d6ba788474177cb080bbc6 +size 1064 diff --git a/run-h3xp0oba/checkpoint-616/trainer_state.json b/run-h3xp0oba/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cdf71315965646e2904c5004e0b03c3eee6bbd --- /dev/null +++ b/run-h3xp0oba/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.919707369689397, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-h3xp0oba/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.708535844509082e-05, + "loss": 1.3707, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9541526436805725, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.7987, + "eval_samples_per_second": 462.643, + "eval_steps_per_second": 3.719, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 9.417071689018165e-05, + "loss": 0.9865, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00014125607533527245, + "loss": 0.8639, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8813747228381374, + "eval_f1": 0.8808939595284657, + "eval_loss": 0.8977882862091064, + "eval_precision": 0.8904300095433347, + "eval_recall": 0.8813747228381374, + "eval_runtime": 7.28, + "eval_samples_per_second": 495.602, + "eval_steps_per_second": 3.983, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001883414337803633, + "loss": 0.8098, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9085949730649107, + "eval_loss": 0.8042497038841248, + "eval_precision": 0.9056541265357774, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8766, + "eval_samples_per_second": 458.063, + "eval_steps_per_second": 3.682, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0002354267922254541, + "loss": 0.7938, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0002825121506705449, + "loss": 0.7793, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8896895787139689, + "eval_f1": 0.8681465686067364, + "eval_loss": 0.8499795794487, + "eval_precision": 0.8779578030766786, + "eval_recall": 0.8896895787139689, + "eval_runtime": 8.0049, + "eval_samples_per_second": 450.725, + "eval_steps_per_second": 3.623, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003295975091156357, + "loss": 0.7901, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003438451065959332, + "loss": 0.7687, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.8971876382462316, + "eval_loss": 0.8221213817596436, + "eval_precision": 0.8958006522706893, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.7963, + "eval_samples_per_second": 462.783, + "eval_steps_per_second": 3.72, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003426516734086289, + "loss": 0.7745, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.88470066518847, + "eval_f1": 0.8882250321236937, + "eval_loss": 0.8452194929122925, + "eval_precision": 0.8951227807591345, + "eval_recall": 0.88470066518847, + "eval_runtime": 8.2102, + "eval_samples_per_second": 439.455, + "eval_steps_per_second": 3.532, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00034046450666518654, + "loss": 0.7715, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0003372963457917926, + "loss": 0.7595, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9041019955654102, + "eval_f1": 0.8901171399145966, + "eval_loss": 0.8262869119644165, + "eval_precision": 0.8926047678427367, + "eval_recall": 0.9041019955654102, + "eval_runtime": 7.9303, + "eval_samples_per_second": 454.961, + "eval_steps_per_second": 3.657, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0003331656441376095, + "loss": 0.7505, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003280964614909359, + "loss": 0.7459, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8137472283813747, + "eval_f1": 0.8398416564115776, + "eval_loss": 0.9601730108261108, + "eval_precision": 0.8950942001259197, + "eval_recall": 0.8137472283813747, + "eval_runtime": 7.743, + "eval_samples_per_second": 465.97, + "eval_steps_per_second": 3.745, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0003221183239399616, + "loss": 0.7394, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.8982832021964763, + "eval_loss": 0.8235355615615845, + "eval_precision": 0.8971396775857995, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.9345, + "eval_samples_per_second": 454.72, + "eval_steps_per_second": 3.655, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003152660518943747, + "loss": 0.7395, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.000307579557269531, + "loss": 0.7362, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.8969664505416969, + "eval_loss": 0.8247323632240295, + "eval_precision": 0.8991603104885488, + "eval_recall": 0.9085365853658537, + "eval_runtime": 7.6618, + "eval_samples_per_second": 470.909, + "eval_steps_per_second": 3.785, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002991036110145131, + "loss": 0.7291, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.8980527824735403, + "eval_loss": 0.8263599872589111, + "eval_precision": 0.9001406800456653, + "eval_recall": 0.8999445676274944, + "eval_runtime": 8.0782, + "eval_samples_per_second": 446.635, + "eval_steps_per_second": 3.59, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.000289887582338138, + "loss": 0.7343, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0002799851511518284, + "loss": 0.7225, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8888580931263859, + "eval_f1": 0.892262887842489, + "eval_loss": 0.8483912348747253, + "eval_precision": 0.9007870145198834, + "eval_recall": 0.8888580931263859, + "eval_runtime": 7.4151, + "eval_samples_per_second": 486.572, + "eval_steps_per_second": 3.911, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00026945399540425556, + "loss": 0.7154, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.000258355455128915, + "loss": 0.7149, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.8913716629689546, + "eval_loss": 0.8356512188911438, + "eval_precision": 0.8888349983595902, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.0725, + "eval_samples_per_second": 446.948, + "eval_steps_per_second": 3.592, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002467541751614317, + "loss": 0.716, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9046147830123301, + "eval_loss": 0.8269001245498657, + "eval_precision": 0.9082255172386686, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.982, + "eval_samples_per_second": 452.014, + "eval_steps_per_second": 3.633, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00023471772860763154, + "loss": 0.7123, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00022231622325553658, + "loss": 0.7117, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9085224441891459, + "eval_loss": 0.8177808523178101, + "eval_precision": 0.9062066002075595, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.8494, + "eval_samples_per_second": 459.656, + "eval_steps_per_second": 3.695, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002096218932237875, + "loss": 0.7054, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00019670867822498743, + "loss": 0.7117, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.896962694654186, + "eval_loss": 0.8360165357589722, + "eval_precision": 0.8957397457160988, + "eval_recall": 0.9016075388026608, + "eval_runtime": 7.6561, + "eval_samples_per_second": 471.258, + "eval_steps_per_second": 3.788, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00018365179289459978, + "loss": 0.7047, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.901781957228825, + "eval_loss": 0.8226761817932129, + "eval_precision": 0.9071440802827015, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.7159, + "eval_samples_per_second": 467.607, + "eval_steps_per_second": 3.758, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00017052728869389942, + "loss": 0.7048, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00015741161093872646, + "loss": 0.701, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.896764975630733, + "eval_loss": 0.8337616324424744, + "eval_precision": 0.8984191921102117, + "eval_recall": 0.9085365853658537, + "eval_runtime": 7.7107, + "eval_samples_per_second": 467.921, + "eval_steps_per_second": 3.761, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0001443811535341854, + "loss": 0.7008, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00013151181400879235, + "loss": 0.6973, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9020413748945396, + "eval_loss": 0.8200169205665588, + "eval_precision": 0.9018709427639722, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9098, + "eval_samples_per_second": 456.141, + "eval_steps_per_second": 3.666, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001188785514398297, + "loss": 0.6966, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9094789276982368, + "eval_loss": 0.8151785135269165, + "eval_precision": 0.9074342779532517, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.7446, + "eval_samples_per_second": 465.874, + "eval_steps_per_second": 3.745, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00010655494984482727, + "loss": 0.6953, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 9.461278958225152e-05, + "loss": 0.6932, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9123797089014785, + "eval_loss": 0.8112426996231079, + "eval_precision": 0.910060537408767, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8945, + "eval_samples_per_second": 457.027, + "eval_steps_per_second": 3.673, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 8.312162925783138e-05, + "loss": 0.6905, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9067208945732566, + "eval_loss": 0.8171787261962891, + "eval_precision": 0.903844360120071, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.066, + "eval_samples_per_second": 447.311, + "eval_steps_per_second": 3.595, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 7.2148400571762e-05, + "loss": 0.6947, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 6.175701846664736e-05, + "loss": 0.6921, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9154153170991405, + "eval_loss": 0.8030020594596863, + "eval_precision": 0.9125617708072467, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.4174, + "eval_samples_per_second": 486.423, + "eval_steps_per_second": 3.91, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 5.200800884692487e-05, + "loss": 0.69, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 4.2958156038166964e-05, + "loss": 0.6878, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9137141042856662, + "eval_loss": 0.8066474199295044, + "eval_precision": 0.9129350835355605, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.349, + "eval_samples_per_second": 432.145, + "eval_steps_per_second": 3.473, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.466017203967616e-05, + "loss": 0.6902, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9129597537646226, + "eval_loss": 0.8081188797950745, + "eval_precision": 0.9115179393978867, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.9893, + "eval_samples_per_second": 451.604, + "eval_steps_per_second": 3.63, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.716238949685372e-05, + "loss": 0.6844, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.0508480181663194e-05, + "loss": 0.6855, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.9182733671123653, + "eval_loss": 0.8042904138565063, + "eval_precision": 0.9166053258497058, + "eval_recall": 0.9221175166297118, + "eval_runtime": 7.9624, + "eval_samples_per_second": 453.127, + "eval_steps_per_second": 3.642, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.4737200620933608e-05, + "loss": 0.6886, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.88216635412263e-06, + "loss": 0.6869, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9144784355680313, + "eval_loss": 0.8081766963005066, + "eval_precision": 0.9117954781523269, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.1522, + "eval_samples_per_second": 442.578, + "eval_steps_per_second": 3.557, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.971656135403825e-06, + "loss": 0.6872, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9154760990663301, + "eval_loss": 0.8012545108795166, + "eval_precision": 0.9128092439274615, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.2123, + "eval_samples_per_second": 439.34, + "eval_steps_per_second": 3.531, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 3.028447220526815e-06, + "loss": 0.6887, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.0696826978430148e-06, + "loss": 0.688, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.919707369689397, + "eval_loss": 0.8022695183753967, + "eval_precision": 0.9176418791088158, + "eval_recall": 0.9232261640798226, + "eval_runtime": 7.9087, + "eval_samples_per_second": 456.209, + "eval_steps_per_second": 3.667, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3805565983260753, + "learning_rate": 0.0003440853117141252, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-h3xp0oba/checkpoint-616/training_args.bin b/run-h3xp0oba/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..59769f0a26cc3715ddd41e23fc561b3356d44cbd --- /dev/null +++ b/run-h3xp0oba/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8def5c23cb34d73de5d8c3661f5a54654711eed266d5dddb7724299103b835 +size 4792 diff --git a/run-h3xp0oba/checkpoint-630/model.safetensors b/run-h3xp0oba/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3dd881a98df89fbb46771b9ea8c8a954e1d2c2b --- /dev/null +++ b/run-h3xp0oba/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ad4f061806f7b10b0b277b6c621c64efcd83819054653fda940ec1b3fdb31f +size 198025308 diff --git a/run-h3xp0oba/checkpoint-630/optimizer.pt b/run-h3xp0oba/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..17241ac92af504128603c205cee423b706b1a4c0 --- /dev/null +++ b/run-h3xp0oba/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34b5479aba61ce9aca3b88a83bb0e81942e3d511c7adc81223b4e56d82ccb473 +size 395900602 diff --git a/run-h3xp0oba/checkpoint-630/rng_state.pth b/run-h3xp0oba/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-h3xp0oba/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-h3xp0oba/checkpoint-630/scheduler.pt b/run-h3xp0oba/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..088310351932f8d33644f83ac827a3f73e542dd8 --- /dev/null +++ b/run-h3xp0oba/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53dbade7e2543a1988d682ac8bdb99340e2cc9c5754b08fd0516a0713119c3ab +size 1064 diff --git a/run-h3xp0oba/checkpoint-630/trainer_state.json b/run-h3xp0oba/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..98a0b5eedd4508e079bc69a7e5847e1f11222005 --- /dev/null +++ b/run-h3xp0oba/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.919707369689397, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-h3xp0oba/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.708535844509082e-05, + "loss": 1.3707, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9541526436805725, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.7987, + "eval_samples_per_second": 462.643, + "eval_steps_per_second": 3.719, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 9.417071689018165e-05, + "loss": 0.9865, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00014125607533527245, + "loss": 0.8639, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8813747228381374, + "eval_f1": 0.8808939595284657, + "eval_loss": 0.8977882862091064, + "eval_precision": 0.8904300095433347, + "eval_recall": 0.8813747228381374, + "eval_runtime": 7.28, + "eval_samples_per_second": 495.602, + "eval_steps_per_second": 3.983, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001883414337803633, + "loss": 0.8098, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9085949730649107, + "eval_loss": 0.8042497038841248, + "eval_precision": 0.9056541265357774, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8766, + "eval_samples_per_second": 458.063, + "eval_steps_per_second": 3.682, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0002354267922254541, + "loss": 0.7938, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0002825121506705449, + "loss": 0.7793, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8896895787139689, + "eval_f1": 0.8681465686067364, + "eval_loss": 0.8499795794487, + "eval_precision": 0.8779578030766786, + "eval_recall": 0.8896895787139689, + "eval_runtime": 8.0049, + "eval_samples_per_second": 450.725, + "eval_steps_per_second": 3.623, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003295975091156357, + "loss": 0.7901, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003438451065959332, + "loss": 0.7687, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.8971876382462316, + "eval_loss": 0.8221213817596436, + "eval_precision": 0.8958006522706893, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.7963, + "eval_samples_per_second": 462.783, + "eval_steps_per_second": 3.72, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003426516734086289, + "loss": 0.7745, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.88470066518847, + "eval_f1": 0.8882250321236937, + "eval_loss": 0.8452194929122925, + "eval_precision": 0.8951227807591345, + "eval_recall": 0.88470066518847, + "eval_runtime": 8.2102, + "eval_samples_per_second": 439.455, + "eval_steps_per_second": 3.532, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00034046450666518654, + "loss": 0.7715, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0003372963457917926, + "loss": 0.7595, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9041019955654102, + "eval_f1": 0.8901171399145966, + "eval_loss": 0.8262869119644165, + "eval_precision": 0.8926047678427367, + "eval_recall": 0.9041019955654102, + "eval_runtime": 7.9303, + "eval_samples_per_second": 454.961, + "eval_steps_per_second": 3.657, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0003331656441376095, + "loss": 0.7505, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003280964614909359, + "loss": 0.7459, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8137472283813747, + "eval_f1": 0.8398416564115776, + "eval_loss": 0.9601730108261108, + "eval_precision": 0.8950942001259197, + "eval_recall": 0.8137472283813747, + "eval_runtime": 7.743, + "eval_samples_per_second": 465.97, + "eval_steps_per_second": 3.745, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0003221183239399616, + "loss": 0.7394, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.8982832021964763, + "eval_loss": 0.8235355615615845, + "eval_precision": 0.8971396775857995, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.9345, + "eval_samples_per_second": 454.72, + "eval_steps_per_second": 3.655, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003152660518943747, + "loss": 0.7395, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.000307579557269531, + "loss": 0.7362, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.8969664505416969, + "eval_loss": 0.8247323632240295, + "eval_precision": 0.8991603104885488, + "eval_recall": 0.9085365853658537, + "eval_runtime": 7.6618, + "eval_samples_per_second": 470.909, + "eval_steps_per_second": 3.785, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002991036110145131, + "loss": 0.7291, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.8980527824735403, + "eval_loss": 0.8263599872589111, + "eval_precision": 0.9001406800456653, + "eval_recall": 0.8999445676274944, + "eval_runtime": 8.0782, + "eval_samples_per_second": 446.635, + "eval_steps_per_second": 3.59, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.000289887582338138, + "loss": 0.7343, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0002799851511518284, + "loss": 0.7225, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8888580931263859, + "eval_f1": 0.892262887842489, + "eval_loss": 0.8483912348747253, + "eval_precision": 0.9007870145198834, + "eval_recall": 0.8888580931263859, + "eval_runtime": 7.4151, + "eval_samples_per_second": 486.572, + "eval_steps_per_second": 3.911, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00026945399540425556, + "loss": 0.7154, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.000258355455128915, + "loss": 0.7149, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.8913716629689546, + "eval_loss": 0.8356512188911438, + "eval_precision": 0.8888349983595902, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.0725, + "eval_samples_per_second": 446.948, + "eval_steps_per_second": 3.592, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002467541751614317, + "loss": 0.716, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9046147830123301, + "eval_loss": 0.8269001245498657, + "eval_precision": 0.9082255172386686, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.982, + "eval_samples_per_second": 452.014, + "eval_steps_per_second": 3.633, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00023471772860763154, + "loss": 0.7123, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00022231622325553658, + "loss": 0.7117, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9085224441891459, + "eval_loss": 0.8177808523178101, + "eval_precision": 0.9062066002075595, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.8494, + "eval_samples_per_second": 459.656, + "eval_steps_per_second": 3.695, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002096218932237875, + "loss": 0.7054, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00019670867822498743, + "loss": 0.7117, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.896962694654186, + "eval_loss": 0.8360165357589722, + "eval_precision": 0.8957397457160988, + "eval_recall": 0.9016075388026608, + "eval_runtime": 7.6561, + "eval_samples_per_second": 471.258, + "eval_steps_per_second": 3.788, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00018365179289459978, + "loss": 0.7047, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.901781957228825, + "eval_loss": 0.8226761817932129, + "eval_precision": 0.9071440802827015, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.7159, + "eval_samples_per_second": 467.607, + "eval_steps_per_second": 3.758, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00017052728869389942, + "loss": 0.7048, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00015741161093872646, + "loss": 0.701, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.896764975630733, + "eval_loss": 0.8337616324424744, + "eval_precision": 0.8984191921102117, + "eval_recall": 0.9085365853658537, + "eval_runtime": 7.7107, + "eval_samples_per_second": 467.921, + "eval_steps_per_second": 3.761, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0001443811535341854, + "loss": 0.7008, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00013151181400879235, + "loss": 0.6973, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9020413748945396, + "eval_loss": 0.8200169205665588, + "eval_precision": 0.9018709427639722, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9098, + "eval_samples_per_second": 456.141, + "eval_steps_per_second": 3.666, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001188785514398297, + "loss": 0.6966, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9094789276982368, + "eval_loss": 0.8151785135269165, + "eval_precision": 0.9074342779532517, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.7446, + "eval_samples_per_second": 465.874, + "eval_steps_per_second": 3.745, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00010655494984482727, + "loss": 0.6953, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 9.461278958225152e-05, + "loss": 0.6932, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9123797089014785, + "eval_loss": 0.8112426996231079, + "eval_precision": 0.910060537408767, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8945, + "eval_samples_per_second": 457.027, + "eval_steps_per_second": 3.673, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 8.312162925783138e-05, + "loss": 0.6905, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9067208945732566, + "eval_loss": 0.8171787261962891, + "eval_precision": 0.903844360120071, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.066, + "eval_samples_per_second": 447.311, + "eval_steps_per_second": 3.595, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 7.2148400571762e-05, + "loss": 0.6947, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 6.175701846664736e-05, + "loss": 0.6921, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9154153170991405, + "eval_loss": 0.8030020594596863, + "eval_precision": 0.9125617708072467, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.4174, + "eval_samples_per_second": 486.423, + "eval_steps_per_second": 3.91, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 5.200800884692487e-05, + "loss": 0.69, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 4.2958156038166964e-05, + "loss": 0.6878, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9137141042856662, + "eval_loss": 0.8066474199295044, + "eval_precision": 0.9129350835355605, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.349, + "eval_samples_per_second": 432.145, + "eval_steps_per_second": 3.473, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.466017203967616e-05, + "loss": 0.6902, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9129597537646226, + "eval_loss": 0.8081188797950745, + "eval_precision": 0.9115179393978867, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.9893, + "eval_samples_per_second": 451.604, + "eval_steps_per_second": 3.63, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.716238949685372e-05, + "loss": 0.6844, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.0508480181663194e-05, + "loss": 0.6855, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.9182733671123653, + "eval_loss": 0.8042904138565063, + "eval_precision": 0.9166053258497058, + "eval_recall": 0.9221175166297118, + "eval_runtime": 7.9624, + "eval_samples_per_second": 453.127, + "eval_steps_per_second": 3.642, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.4737200620933608e-05, + "loss": 0.6886, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.88216635412263e-06, + "loss": 0.6869, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9144784355680313, + "eval_loss": 0.8081766963005066, + "eval_precision": 0.9117954781523269, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.1522, + "eval_samples_per_second": 442.578, + "eval_steps_per_second": 3.557, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.971656135403825e-06, + "loss": 0.6872, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9154760990663301, + "eval_loss": 0.8012545108795166, + "eval_precision": 0.9128092439274615, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.2123, + "eval_samples_per_second": 439.34, + "eval_steps_per_second": 3.531, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 3.028447220526815e-06, + "loss": 0.6887, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.0696826978430148e-06, + "loss": 0.688, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.919707369689397, + "eval_loss": 0.8022695183753967, + "eval_precision": 0.9176418791088158, + "eval_recall": 0.9232261640798226, + "eval_runtime": 7.9087, + "eval_samples_per_second": 456.209, + "eval_steps_per_second": 3.667, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.0677163624398865e-07, + "loss": 0.6817, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9133753558391371, + "eval_loss": 0.8089249134063721, + "eval_precision": 0.9115599360078197, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.5213, + "eval_samples_per_second": 479.704, + "eval_steps_per_second": 3.856, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3805565983260753, + "learning_rate": 0.0003440853117141252, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-h3xp0oba/checkpoint-630/training_args.bin b/run-h3xp0oba/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..59769f0a26cc3715ddd41e23fc561b3356d44cbd --- /dev/null +++ b/run-h3xp0oba/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8def5c23cb34d73de5d8c3661f5a54654711eed266d5dddb7724299103b835 +size 4792 diff --git a/run-hgfebf69/checkpoint-595/model.safetensors b/run-hgfebf69/checkpoint-595/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad70c0908d454e69f33998d220d9f28b3b2b735b --- /dev/null +++ b/run-hgfebf69/checkpoint-595/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fde93d7cdbc68ce5793a335d278b2b442e788033b900448654548726b072367 +size 198025308 diff --git a/run-hgfebf69/checkpoint-595/optimizer.pt b/run-hgfebf69/checkpoint-595/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee06662bdfa26e81e58501b6a6942701fbcf4a02 --- /dev/null +++ b/run-hgfebf69/checkpoint-595/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:111f834b2d3d55b9463f181f48cd2bab6b6e04bd07380d16c66849774b581ba6 +size 395900602 diff --git a/run-hgfebf69/checkpoint-595/rng_state.pth b/run-hgfebf69/checkpoint-595/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b2798d3ef22ba33b35deea6a8c61abbb56099a6 --- /dev/null +++ b/run-hgfebf69/checkpoint-595/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4b46d5b7fd917d05ccb48b8b2f6f0c7b9f5cfd5e53675d2f6391274fc4f7a5 +size 14244 diff --git a/run-hgfebf69/checkpoint-595/scheduler.pt b/run-hgfebf69/checkpoint-595/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b15044192f097175180c57797ea739dcb53e5cdd --- /dev/null +++ b/run-hgfebf69/checkpoint-595/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5b4fa60116226c4bdfaa0346dcdcb7712da373d4eda7b17f64f8dccd33bbbe +size 1064 diff --git a/run-hgfebf69/checkpoint-595/trainer_state.json b/run-hgfebf69/checkpoint-595/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d464480667e6efa5e883778f0014239d7aa365f9 --- /dev/null +++ b/run-hgfebf69/checkpoint-595/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.7796974441892816, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-hgfebf69/checkpoint-595", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 595, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.452508274337144e-07, + "loss": 1.5355, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.3932926829268293, + "eval_f1": 0.484929996783293, + "eval_loss": 1.5276610851287842, + "eval_precision": 0.6852684114434121, + "eval_recall": 0.3932926829268293, + "eval_runtime": 8.1255, + "eval_samples_per_second": 444.032, + "eval_steps_per_second": 3.569, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.905016548674288e-07, + "loss": 1.5334, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.357524823011432e-07, + "loss": 1.5242, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.4584257206208426, + "eval_f1": 0.5440289476561175, + "eval_loss": 1.5126081705093384, + "eval_precision": 0.6975641567116644, + "eval_recall": 0.4584257206208426, + "eval_runtime": 8.3776, + "eval_samples_per_second": 430.674, + "eval_steps_per_second": 3.462, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 5.810033097348576e-07, + "loss": 1.5104, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.5296563192904656, + "eval_f1": 0.5944990700609136, + "eval_loss": 1.4862231016159058, + "eval_precision": 0.6890568361070714, + "eval_recall": 0.5296563192904656, + "eval_runtime": 8.1023, + "eval_samples_per_second": 445.304, + "eval_steps_per_second": 3.579, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.262541371685721e-07, + "loss": 1.493, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 8.715049646022864e-07, + "loss": 1.4698, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6513303769401331, + "eval_f1": 0.6736289843952361, + "eval_loss": 1.4428884983062744, + "eval_precision": 0.7042447934146061, + "eval_recall": 0.6513303769401331, + "eval_runtime": 8.2686, + "eval_samples_per_second": 436.352, + "eval_steps_per_second": 3.507, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 1.0167557920360007e-06, + "loss": 1.4457, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 1.1620066194697152e-06, + "loss": 1.4126, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.7513858093126385, + "eval_f1": 0.7236423084979209, + "eval_loss": 1.3878988027572632, + "eval_precision": 0.704223393234371, + "eval_recall": 0.7513858093126385, + "eval_runtime": 8.0477, + "eval_samples_per_second": 448.328, + "eval_steps_per_second": 3.604, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 1.3072574469034296e-06, + "loss": 1.3745, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8009977827050998, + "eval_f1": 0.7446621793568696, + "eval_loss": 1.3229458332061768, + "eval_precision": 0.7198332904237893, + "eval_recall": 0.8009977827050998, + "eval_runtime": 8.341, + "eval_samples_per_second": 432.564, + "eval_steps_per_second": 3.477, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 1.4075969248184277e-06, + "loss": 1.3395, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 1.4038673596551303e-06, + "loss": 1.2941, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8245565410199557, + "eval_f1": 0.7506711729341005, + "eval_loss": 1.2522051334381104, + "eval_precision": 0.7225360812946281, + "eval_recall": 0.8245565410199557, + "eval_runtime": 8.1304, + "eval_samples_per_second": 443.768, + "eval_steps_per_second": 3.567, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 1.3955441156005382e-06, + "loss": 1.2525, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 1.3826818162583345e-06, + "loss": 1.2116, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8276053215077606, + "eval_f1": 0.7512743652414022, + "eval_loss": 1.1887985467910767, + "eval_precision": 0.7288159081513773, + "eval_recall": 0.8276053215077606, + "eval_runtime": 8.187, + "eval_samples_per_second": 440.697, + "eval_steps_per_second": 3.542, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 1.3653648740447739e-06, + "loss": 1.168, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.1201469898223877, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.4146, + "eval_samples_per_second": 428.78, + "eval_steps_per_second": 3.446, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 1.343706936208727e-06, + "loss": 1.1394, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 1.3178501389900054e-06, + "loss": 1.1014, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 1.0765742063522339, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.0122, + "eval_samples_per_second": 450.311, + "eval_steps_per_second": 3.619, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 1.2879641748107618e-06, + "loss": 1.0706, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.0376129150390625, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.5615, + "eval_samples_per_second": 421.42, + "eval_steps_per_second": 3.387, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 1.254245178621782e-06, + "loss": 1.0439, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 1.2169144407123264e-06, + "loss": 1.0194, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.0062154531478882, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.4048, + "eval_samples_per_second": 429.28, + "eval_steps_per_second": 3.45, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 1.1762169544310694e-06, + "loss": 1.0048, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 1.132419808349121e-06, + "loss": 0.9997, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 0.9836404919624329, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.5535, + "eval_samples_per_second": 421.814, + "eval_steps_per_second": 3.39, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 1.0858104334170029e-06, + "loss": 0.9776, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9691689610481262, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9567, + "eval_samples_per_second": 453.453, + "eval_steps_per_second": 3.645, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 1.0366947166191075e-06, + "loss": 0.9671, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 9.853949935052988e-07, + "loss": 0.9587, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8292682926829268, + "eval_f1": 0.7526922431410414, + "eval_loss": 0.9562509059906006, + "eval_precision": 0.7703043936882441, + "eval_recall": 0.8292682926829268, + "eval_runtime": 8.4186, + "eval_samples_per_second": 428.574, + "eval_steps_per_second": 3.445, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 9.32247932774231e-07, + "loss": 0.9463, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.776023267913875e-07, + "loss": 0.9492, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.83009977827051, + "eval_f1": 0.7547519055308186, + "eval_loss": 0.9446051120758057, + "eval_precision": 0.7591494201844647, + "eval_recall": 0.83009977827051, + "eval_runtime": 8.2197, + "eval_samples_per_second": 438.947, + "eval_steps_per_second": 3.528, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 8.218168025421926e-07, + "loss": 0.9437, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.83009977827051, + "eval_f1": 0.7553598948352633, + "eval_loss": 0.9370310306549072, + "eval_precision": 0.7528714352407055, + "eval_recall": 0.83009977827051, + "eval_runtime": 8.2035, + "eval_samples_per_second": 439.813, + "eval_steps_per_second": 3.535, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.652574680426876e-07, + "loss": 0.9358, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 7.082955096538664e-07, + "loss": 0.9363, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.8317627494456763, + "eval_f1": 0.758845429136837, + "eval_loss": 0.9373316168785095, + "eval_precision": 0.7519338830068296, + "eval_recall": 0.8317627494456763, + "eval_runtime": 8.2615, + "eval_samples_per_second": 436.724, + "eval_steps_per_second": 3.51, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 6.513047560679618e-07, + "loss": 0.9316, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.94659224953694e-07, + "loss": 0.9264, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.8323170731707317, + "eval_f1": 0.7606463034975902, + "eval_loss": 0.9292559027671814, + "eval_precision": 0.7526638484809335, + "eval_recall": 0.8323170731707317, + "eval_runtime": 8.115, + "eval_samples_per_second": 444.609, + "eval_steps_per_second": 3.574, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 5.387306683613124e-07, + "loss": 0.9316, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8328713968957872, + "eval_f1": 0.762008565582684, + "eval_loss": 0.9269818067550659, + "eval_precision": 0.7475944578147732, + "eval_recall": 0.8328713968957872, + "eval_runtime": 8.3826, + "eval_samples_per_second": 430.415, + "eval_steps_per_second": 3.46, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.838861329963851e-07, + "loss": 0.92, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.304855513737253e-07, + "loss": 0.9174, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8350886917960089, + "eval_f1": 0.766268232245012, + "eval_loss": 0.9251817464828491, + "eval_precision": 0.7611039143133754, + "eval_recall": 0.8350886917960089, + "eval_runtime": 7.7339, + "eval_samples_per_second": 466.52, + "eval_steps_per_second": 3.75, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.7887937966016937e-07, + "loss": 0.9212, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.8364745011086474, + "eval_f1": 0.7701721753563872, + "eval_loss": 0.9216660261154175, + "eval_precision": 0.7861397608879442, + "eval_recall": 0.8364745011086474, + "eval_runtime": 8.2364, + "eval_samples_per_second": 438.057, + "eval_steps_per_second": 3.521, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.29406297708525e-07, + "loss": 0.9222, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.8239098637685624e-07, + "loss": 0.9142, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.8389689578713969, + "eval_f1": 0.7751934812972323, + "eval_loss": 0.9198487997055054, + "eval_precision": 0.7580660317760047, + "eval_recall": 0.8389689578713969, + "eval_runtime": 8.1907, + "eval_samples_per_second": 440.497, + "eval_steps_per_second": 3.541, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.3814199672006268e-07, + "loss": 0.9239, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.9694972503777733e-07, + "loss": 0.9178, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.8356430155210643, + "eval_f1": 0.7695758947830811, + "eval_loss": 0.9179719686508179, + "eval_precision": 0.7756305393423396, + "eval_recall": 0.8356430155210643, + "eval_runtime": 8.0754, + "eval_samples_per_second": 446.788, + "eval_steps_per_second": 3.591, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.5908450706789677e-07, + "loss": 0.9152, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.8395232815964523, + "eval_f1": 0.7765057889032959, + "eval_loss": 0.9176101088523865, + "eval_precision": 0.7933438992755063, + "eval_recall": 0.8395232815964523, + "eval_runtime": 8.0393, + "eval_samples_per_second": 448.793, + "eval_steps_per_second": 3.607, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.2479484383312357e-07, + "loss": 0.9204, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 9.430577078390059e-08, + "loss": 0.9096, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.8398004434589801, + "eval_f1": 0.7763506725004051, + "eval_loss": 0.9181774258613586, + "eval_precision": 0.7954486998022142, + "eval_recall": 0.8398004434589801, + "eval_runtime": 8.1901, + "eval_samples_per_second": 440.533, + "eval_steps_per_second": 3.541, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.781738094068847e-08, + "loss": 0.9157, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.550351172787507e-08, + "loss": 0.9211, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.8364745011086474, + "eval_f1": 0.7710155859925437, + "eval_loss": 0.9194357991218567, + "eval_precision": 0.7851756179427326, + "eval_recall": 0.8364745011086474, + "eval_runtime": 8.2806, + "eval_samples_per_second": 435.717, + "eval_steps_per_second": 3.502, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.751060411733735e-08, + "loss": 0.912, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.8409090909090909, + "eval_f1": 0.7796974441892816, + "eval_loss": 0.9164049625396729, + "eval_precision": 0.796046494839443, + "eval_recall": 0.8409090909090909, + "eval_runtime": 8.3541, + "eval_samples_per_second": 431.882, + "eval_steps_per_second": 3.471, + "step": 595 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.019838947875582387, + "learning_rate": 1.4078157120498472e-06, + "metric": "eval/loss", + "weight_decay": 0.024972382732841077 + } +} diff --git a/run-hgfebf69/checkpoint-595/training_args.bin b/run-hgfebf69/checkpoint-595/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3bd5311090bbd1c17a4655fee6822a125e769701 --- /dev/null +++ b/run-hgfebf69/checkpoint-595/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57057c84551af78939886e88dfc8438307ce767a240842b55059823a7910895f +size 4792 diff --git a/run-hgfebf69/checkpoint-630/model.safetensors b/run-hgfebf69/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c21bf3ff6a582a41c3c33960f6126fe6e9310271 --- /dev/null +++ b/run-hgfebf69/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f927e9f6529e59e5560750103ca7e7f2428061c820f9f7625fd7aa857fc9ce9c +size 198025308 diff --git a/run-hgfebf69/checkpoint-630/optimizer.pt b/run-hgfebf69/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6fcdc63bfcd8fb7983ddf75acb03e7b6231bdea --- /dev/null +++ b/run-hgfebf69/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81d0ca89b1f6b1877a4515d7bf57e247538af265e3f74cbaf6929ab3ebf2f66 +size 395900602 diff --git a/run-hgfebf69/checkpoint-630/rng_state.pth b/run-hgfebf69/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-hgfebf69/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-hgfebf69/checkpoint-630/scheduler.pt b/run-hgfebf69/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..920fbe5f67f6af9273dc8f89cc05d38de0babf00 --- /dev/null +++ b/run-hgfebf69/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0518f7c1956b7ff07404887b0c560f56a80c6e94ac5a5cc2334b2bdf557d47e1 +size 1064 diff --git a/run-hgfebf69/checkpoint-630/trainer_state.json b/run-hgfebf69/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..29a361238c63f30a4d8a86d60cc1d6bbac71cabd --- /dev/null +++ b/run-hgfebf69/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.7796974441892816, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-hgfebf69/checkpoint-595", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.452508274337144e-07, + "loss": 1.5355, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.3932926829268293, + "eval_f1": 0.484929996783293, + "eval_loss": 1.5276610851287842, + "eval_precision": 0.6852684114434121, + "eval_recall": 0.3932926829268293, + "eval_runtime": 8.1255, + "eval_samples_per_second": 444.032, + "eval_steps_per_second": 3.569, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.905016548674288e-07, + "loss": 1.5334, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.357524823011432e-07, + "loss": 1.5242, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.4584257206208426, + "eval_f1": 0.5440289476561175, + "eval_loss": 1.5126081705093384, + "eval_precision": 0.6975641567116644, + "eval_recall": 0.4584257206208426, + "eval_runtime": 8.3776, + "eval_samples_per_second": 430.674, + "eval_steps_per_second": 3.462, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 5.810033097348576e-07, + "loss": 1.5104, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.5296563192904656, + "eval_f1": 0.5944990700609136, + "eval_loss": 1.4862231016159058, + "eval_precision": 0.6890568361070714, + "eval_recall": 0.5296563192904656, + "eval_runtime": 8.1023, + "eval_samples_per_second": 445.304, + "eval_steps_per_second": 3.579, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.262541371685721e-07, + "loss": 1.493, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 8.715049646022864e-07, + "loss": 1.4698, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6513303769401331, + "eval_f1": 0.6736289843952361, + "eval_loss": 1.4428884983062744, + "eval_precision": 0.7042447934146061, + "eval_recall": 0.6513303769401331, + "eval_runtime": 8.2686, + "eval_samples_per_second": 436.352, + "eval_steps_per_second": 3.507, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 1.0167557920360007e-06, + "loss": 1.4457, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 1.1620066194697152e-06, + "loss": 1.4126, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.7513858093126385, + "eval_f1": 0.7236423084979209, + "eval_loss": 1.3878988027572632, + "eval_precision": 0.704223393234371, + "eval_recall": 0.7513858093126385, + "eval_runtime": 8.0477, + "eval_samples_per_second": 448.328, + "eval_steps_per_second": 3.604, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 1.3072574469034296e-06, + "loss": 1.3745, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8009977827050998, + "eval_f1": 0.7446621793568696, + "eval_loss": 1.3229458332061768, + "eval_precision": 0.7198332904237893, + "eval_recall": 0.8009977827050998, + "eval_runtime": 8.341, + "eval_samples_per_second": 432.564, + "eval_steps_per_second": 3.477, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 1.4075969248184277e-06, + "loss": 1.3395, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 1.4038673596551303e-06, + "loss": 1.2941, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8245565410199557, + "eval_f1": 0.7506711729341005, + "eval_loss": 1.2522051334381104, + "eval_precision": 0.7225360812946281, + "eval_recall": 0.8245565410199557, + "eval_runtime": 8.1304, + "eval_samples_per_second": 443.768, + "eval_steps_per_second": 3.567, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 1.3955441156005382e-06, + "loss": 1.2525, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 1.3826818162583345e-06, + "loss": 1.2116, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8276053215077606, + "eval_f1": 0.7512743652414022, + "eval_loss": 1.1887985467910767, + "eval_precision": 0.7288159081513773, + "eval_recall": 0.8276053215077606, + "eval_runtime": 8.187, + "eval_samples_per_second": 440.697, + "eval_steps_per_second": 3.542, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 1.3653648740447739e-06, + "loss": 1.168, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.1201469898223877, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.4146, + "eval_samples_per_second": 428.78, + "eval_steps_per_second": 3.446, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 1.343706936208727e-06, + "loss": 1.1394, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 1.3178501389900054e-06, + "loss": 1.1014, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 1.0765742063522339, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.0122, + "eval_samples_per_second": 450.311, + "eval_steps_per_second": 3.619, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 1.2879641748107618e-06, + "loss": 1.0706, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.0376129150390625, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.5615, + "eval_samples_per_second": 421.42, + "eval_steps_per_second": 3.387, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 1.254245178621782e-06, + "loss": 1.0439, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 1.2169144407123264e-06, + "loss": 1.0194, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.0062154531478882, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.4048, + "eval_samples_per_second": 429.28, + "eval_steps_per_second": 3.45, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 1.1762169544310694e-06, + "loss": 1.0048, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 1.132419808349121e-06, + "loss": 0.9997, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 0.9836404919624329, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.5535, + "eval_samples_per_second": 421.814, + "eval_steps_per_second": 3.39, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 1.0858104334170029e-06, + "loss": 0.9776, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9691689610481262, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9567, + "eval_samples_per_second": 453.453, + "eval_steps_per_second": 3.645, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 1.0366947166191075e-06, + "loss": 0.9671, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 9.853949935052988e-07, + "loss": 0.9587, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8292682926829268, + "eval_f1": 0.7526922431410414, + "eval_loss": 0.9562509059906006, + "eval_precision": 0.7703043936882441, + "eval_recall": 0.8292682926829268, + "eval_runtime": 8.4186, + "eval_samples_per_second": 428.574, + "eval_steps_per_second": 3.445, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 9.32247932774231e-07, + "loss": 0.9463, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.776023267913875e-07, + "loss": 0.9492, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.83009977827051, + "eval_f1": 0.7547519055308186, + "eval_loss": 0.9446051120758057, + "eval_precision": 0.7591494201844647, + "eval_recall": 0.83009977827051, + "eval_runtime": 8.2197, + "eval_samples_per_second": 438.947, + "eval_steps_per_second": 3.528, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 8.218168025421926e-07, + "loss": 0.9437, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.83009977827051, + "eval_f1": 0.7553598948352633, + "eval_loss": 0.9370310306549072, + "eval_precision": 0.7528714352407055, + "eval_recall": 0.83009977827051, + "eval_runtime": 8.2035, + "eval_samples_per_second": 439.813, + "eval_steps_per_second": 3.535, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.652574680426876e-07, + "loss": 0.9358, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 7.082955096538664e-07, + "loss": 0.9363, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.8317627494456763, + "eval_f1": 0.758845429136837, + "eval_loss": 0.9373316168785095, + "eval_precision": 0.7519338830068296, + "eval_recall": 0.8317627494456763, + "eval_runtime": 8.2615, + "eval_samples_per_second": 436.724, + "eval_steps_per_second": 3.51, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 6.513047560679618e-07, + "loss": 0.9316, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.94659224953694e-07, + "loss": 0.9264, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.8323170731707317, + "eval_f1": 0.7606463034975902, + "eval_loss": 0.9292559027671814, + "eval_precision": 0.7526638484809335, + "eval_recall": 0.8323170731707317, + "eval_runtime": 8.115, + "eval_samples_per_second": 444.609, + "eval_steps_per_second": 3.574, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 5.387306683613124e-07, + "loss": 0.9316, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8328713968957872, + "eval_f1": 0.762008565582684, + "eval_loss": 0.9269818067550659, + "eval_precision": 0.7475944578147732, + "eval_recall": 0.8328713968957872, + "eval_runtime": 8.3826, + "eval_samples_per_second": 430.415, + "eval_steps_per_second": 3.46, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.838861329963851e-07, + "loss": 0.92, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.304855513737253e-07, + "loss": 0.9174, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8350886917960089, + "eval_f1": 0.766268232245012, + "eval_loss": 0.9251817464828491, + "eval_precision": 0.7611039143133754, + "eval_recall": 0.8350886917960089, + "eval_runtime": 7.7339, + "eval_samples_per_second": 466.52, + "eval_steps_per_second": 3.75, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.7887937966016937e-07, + "loss": 0.9212, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.8364745011086474, + "eval_f1": 0.7701721753563872, + "eval_loss": 0.9216660261154175, + "eval_precision": 0.7861397608879442, + "eval_recall": 0.8364745011086474, + "eval_runtime": 8.2364, + "eval_samples_per_second": 438.057, + "eval_steps_per_second": 3.521, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.29406297708525e-07, + "loss": 0.9222, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.8239098637685624e-07, + "loss": 0.9142, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.8389689578713969, + "eval_f1": 0.7751934812972323, + "eval_loss": 0.9198487997055054, + "eval_precision": 0.7580660317760047, + "eval_recall": 0.8389689578713969, + "eval_runtime": 8.1907, + "eval_samples_per_second": 440.497, + "eval_steps_per_second": 3.541, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.3814199672006268e-07, + "loss": 0.9239, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.9694972503777733e-07, + "loss": 0.9178, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.8356430155210643, + "eval_f1": 0.7695758947830811, + "eval_loss": 0.9179719686508179, + "eval_precision": 0.7756305393423396, + "eval_recall": 0.8356430155210643, + "eval_runtime": 8.0754, + "eval_samples_per_second": 446.788, + "eval_steps_per_second": 3.591, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.5908450706789677e-07, + "loss": 0.9152, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.8395232815964523, + "eval_f1": 0.7765057889032959, + "eval_loss": 0.9176101088523865, + "eval_precision": 0.7933438992755063, + "eval_recall": 0.8395232815964523, + "eval_runtime": 8.0393, + "eval_samples_per_second": 448.793, + "eval_steps_per_second": 3.607, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.2479484383312357e-07, + "loss": 0.9204, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 9.430577078390059e-08, + "loss": 0.9096, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.8398004434589801, + "eval_f1": 0.7763506725004051, + "eval_loss": 0.9181774258613586, + "eval_precision": 0.7954486998022142, + "eval_recall": 0.8398004434589801, + "eval_runtime": 8.1901, + "eval_samples_per_second": 440.533, + "eval_steps_per_second": 3.541, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.781738094068847e-08, + "loss": 0.9157, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.550351172787507e-08, + "loss": 0.9211, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.8364745011086474, + "eval_f1": 0.7710155859925437, + "eval_loss": 0.9194357991218567, + "eval_precision": 0.7851756179427326, + "eval_recall": 0.8364745011086474, + "eval_runtime": 8.2806, + "eval_samples_per_second": 435.717, + "eval_steps_per_second": 3.502, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.751060411733735e-08, + "loss": 0.912, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.8409090909090909, + "eval_f1": 0.7796974441892816, + "eval_loss": 0.9164049625396729, + "eval_precision": 0.796046494839443, + "eval_recall": 0.8409090909090909, + "eval_runtime": 8.3541, + "eval_samples_per_second": 431.882, + "eval_steps_per_second": 3.471, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.3956741568839634e-08, + "loss": 0.913, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.930875074491611e-09, + "loss": 0.914, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.8389689578713969, + "eval_f1": 0.7752508972412454, + "eval_loss": 0.9149675369262695, + "eval_precision": 0.7936547610733803, + "eval_recall": 0.8389689578713969, + "eval_runtime": 8.2796, + "eval_samples_per_second": 435.772, + "eval_steps_per_second": 3.503, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 4.922393931268309e-10, + "loss": 0.9114, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.8375831485587583, + "eval_f1": 0.7730602366283935, + "eval_loss": 0.9165772795677185, + "eval_precision": 0.7516532489338807, + "eval_recall": 0.8375831485587583, + "eval_runtime": 8.3965, + "eval_samples_per_second": 429.703, + "eval_steps_per_second": 3.454, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.019838947875582387, + "learning_rate": 1.4078157120498472e-06, + "metric": "eval/loss", + "weight_decay": 0.024972382732841077 + } +} diff --git a/run-hgfebf69/checkpoint-630/training_args.bin b/run-hgfebf69/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3bd5311090bbd1c17a4655fee6822a125e769701 --- /dev/null +++ b/run-hgfebf69/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57057c84551af78939886e88dfc8438307ce767a240842b55059823a7910895f +size 4792 diff --git a/run-hopg4w4q/checkpoint-1260/model.safetensors b/run-hopg4w4q/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..787cf90c93fa81b454e067f1e4c3b397e56cc0d7 --- /dev/null +++ b/run-hopg4w4q/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2905c01d0c811b8223da2faf84a62cf2b2e879a27852d9fcbc2b4d00913fffca +size 198025308 diff --git a/run-hopg4w4q/checkpoint-1260/optimizer.pt b/run-hopg4w4q/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf4c09d2d505307cdc8aaa58ffcdfea1d82189f6 --- /dev/null +++ b/run-hopg4w4q/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:849680922b42270f6bcbc91a502b6780f0b0e177f6a131df036d14ec725af9ee +size 395900602 diff --git a/run-hopg4w4q/checkpoint-1260/rng_state.pth b/run-hopg4w4q/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-hopg4w4q/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-hopg4w4q/checkpoint-1260/scheduler.pt b/run-hopg4w4q/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..261e433c984f3050237b1e394b52383805852482 --- /dev/null +++ b/run-hopg4w4q/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a64b5b922c9606d9c69a17f60dc06317eed414090c4e2cc680ea0b53af6ddc +size 1064 diff --git a/run-hopg4w4q/checkpoint-1260/trainer_state.json b/run-hopg4w4q/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b057afb489f810fb8ecfe58cf188b886915367c9 --- /dev/null +++ b/run-hopg4w4q/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9246119733924612, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-hopg4w4q/checkpoint-765", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.458124158585848e-05, + "loss": 1.3492, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8464523281596452, + "eval_loss": 0.9340153932571411, + "eval_runtime": 6.5825, + "eval_samples_per_second": 548.119, + "eval_steps_per_second": 8.659, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.916248317171696e-05, + "loss": 0.9435, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.374372475757545e-05, + "loss": 0.8488, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8230447173118591, + "eval_runtime": 7.0515, + "eval_samples_per_second": 511.663, + "eval_steps_per_second": 8.083, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 9.832496634343392e-05, + "loss": 0.8105, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.81849604845047, + "eval_runtime": 6.931, + "eval_samples_per_second": 520.561, + "eval_steps_per_second": 8.224, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00012290620792929243, + "loss": 0.7972, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001474874495151509, + "loss": 0.7827, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8350496888160706, + "eval_runtime": 6.7721, + "eval_samples_per_second": 532.772, + "eval_steps_per_second": 8.417, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00017206869110100937, + "loss": 0.778, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00019664993268686784, + "loss": 0.7735, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.805936336517334, + "eval_runtime": 6.7361, + "eval_samples_per_second": 535.621, + "eval_steps_per_second": 8.462, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00022123117427272633, + "loss": 0.7677, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8267498016357422, + "eval_runtime": 6.5915, + "eval_samples_per_second": 547.373, + "eval_steps_per_second": 8.648, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00024581241585858486, + "loss": 0.7676, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0002703936574444433, + "loss": 0.7624, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8373059866962306, + "eval_loss": 0.9183188676834106, + "eval_runtime": 6.5915, + "eval_samples_per_second": 547.375, + "eval_steps_per_second": 8.648, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0002949748990303018, + "loss": 0.7659, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00029639455846884816, + "loss": 0.7616, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8306541019955654, + "eval_loss": 0.9400848746299744, + "eval_runtime": 6.6008, + "eval_samples_per_second": 546.597, + "eval_steps_per_second": 8.635, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002948242160437527, + "loss": 0.7487, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8894124168514412, + "eval_loss": 0.8416470885276794, + "eval_runtime": 6.7539, + "eval_samples_per_second": 534.212, + "eval_steps_per_second": 8.44, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00029216316258908517, + "loss": 0.7581, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00028843122469068773, + "loss": 0.7542, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8516181111335754, + "eval_runtime": 7.0812, + "eval_samples_per_second": 509.517, + "eval_steps_per_second": 8.049, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00028365620772277065, + "loss": 0.7447, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.8203222155570984, + "eval_runtime": 6.3879, + "eval_samples_per_second": 564.818, + "eval_steps_per_second": 8.923, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00027787368867971817, + "loss": 0.7404, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00027112675110424933, + "loss": 0.7316, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8316420316696167, + "eval_runtime": 6.8812, + "eval_samples_per_second": 524.33, + "eval_steps_per_second": 8.283, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00026346566408689203, + "loss": 0.7343, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0002549475077284343, + "loss": 0.7307, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8333638906478882, + "eval_runtime": 6.8576, + "eval_samples_per_second": 526.13, + "eval_steps_per_second": 8.312, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00024563574785590157, + "loss": 0.7256, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8205613493919373, + "eval_runtime": 6.8115, + "eval_samples_per_second": 529.694, + "eval_steps_per_second": 8.368, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0002355997631607031, + "loss": 0.7325, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.000224914328282079, + "loss": 0.7201, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8408406376838684, + "eval_runtime": 6.4881, + "eval_samples_per_second": 556.093, + "eval_steps_per_second": 8.785, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00021365905668721137, + "loss": 0.7209, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0002019178074989083, + "loss": 0.7144, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8243187665939331, + "eval_runtime": 6.8168, + "eval_samples_per_second": 529.283, + "eval_steps_per_second": 8.362, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001897780606903808, + "loss": 0.7206, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8073376417160034, + "eval_runtime": 6.8902, + "eval_samples_per_second": 523.645, + "eval_steps_per_second": 8.273, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00017733026530232008, + "loss": 0.7093, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00016466716553848343, + "loss": 0.7116, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.8021999001502991, + "eval_runtime": 6.8847, + "eval_samples_per_second": 524.063, + "eval_steps_per_second": 8.279, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00015188310976081614, + "loss": 0.7048, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00013907334753254812, + "loss": 0.7063, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8248281478881836, + "eval_runtime": 6.4644, + "eval_samples_per_second": 558.138, + "eval_steps_per_second": 8.818, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00012633331994675194, + "loss": 0.7092, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8134006261825562, + "eval_runtime": 6.9087, + "eval_samples_per_second": 522.241, + "eval_steps_per_second": 8.25, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00011375794852787857, + "loss": 0.6995, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0001014409280044189, + "loss": 0.6991, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.81588214635849, + "eval_runtime": 6.5253, + "eval_samples_per_second": 552.926, + "eval_steps_per_second": 8.735, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 8.947402822199602e-05, + "loss": 0.6954, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8183504939079285, + "eval_runtime": 6.8009, + "eval_samples_per_second": 530.519, + "eval_steps_per_second": 8.381, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 7.794641039809167e-05, + "loss": 0.6978, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 6.694396281275579e-05, + "loss": 0.6969, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8201302886009216, + "eval_runtime": 6.488, + "eval_samples_per_second": 556.105, + "eval_steps_per_second": 8.785, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 5.6548660884837397e-05, + "loss": 0.6941, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 4.6837956401585856e-05, + "loss": 0.6915, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.8043418526649475, + "eval_runtime": 6.7233, + "eval_samples_per_second": 536.641, + "eval_steps_per_second": 8.478, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 3.788420045226195e-05, + "loss": 0.689, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8132599592208862, + "eval_runtime": 6.8143, + "eval_samples_per_second": 529.474, + "eval_steps_per_second": 8.365, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 2.9754104365279124e-05, + "loss": 0.6881, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.2508242665245925e-05, + "loss": 0.6874, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8116953372955322, + "eval_runtime": 6.8725, + "eval_samples_per_second": 524.991, + "eval_steps_per_second": 8.294, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.6200601753204026e-05, + "loss": 0.6884, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.0878177672688533e-05, + "loss": 0.6903, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8083385825157166, + "eval_runtime": 6.6191, + "eval_samples_per_second": 545.091, + "eval_steps_per_second": 8.611, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 6.5806259585164306e-06, + "loss": 0.6911, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.8044778108596802, + "eval_runtime": 7.1008, + "eval_samples_per_second": 508.109, + "eval_steps_per_second": 8.027, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 3.3399661771579875e-06, + "loss": 0.6838, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.1803433600585415e-06, + "loss": 0.6834, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.810207188129425, + "eval_runtime": 6.5604, + "eval_samples_per_second": 549.97, + "eval_steps_per_second": 8.689, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.1784810738813411e-07, + "loss": 0.6879, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.7981667518615723, + "eval_runtime": 6.9945, + "eval_samples_per_second": 515.832, + "eval_steps_per_second": 8.149, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0002968657637676755, + "metric": "eval/loss", + "warmup_ratio": 0.24878488060483964 + } +} diff --git a/run-hopg4w4q/checkpoint-1260/training_args.bin b/run-hopg4w4q/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2aff17527b3865db05bf823299ac2c83ae6696ae --- /dev/null +++ b/run-hopg4w4q/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbde8e42c008daa702bbebdc13a6abfb525128f6c5156dffe8ea4e3fed7ee53c +size 4792 diff --git a/run-hopg4w4q/checkpoint-765/model.safetensors b/run-hopg4w4q/checkpoint-765/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..435f3f94537aaef73cfe15c9ea94e42cbe2d00f9 --- /dev/null +++ b/run-hopg4w4q/checkpoint-765/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:667fc5dcefb09b5fbf6c5b7b0f24d59666c6f9eed14461be55a5a619e0fc3ba7 +size 198025308 diff --git a/run-hopg4w4q/checkpoint-765/optimizer.pt b/run-hopg4w4q/checkpoint-765/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6aa5aaf3376b140af7092517488eb679e1dd3721 --- /dev/null +++ b/run-hopg4w4q/checkpoint-765/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e24ee503eaedf2b19bcccb703709980fbfa40c4a09295c07ca2bc64f5a2ff8af +size 395900602 diff --git a/run-hopg4w4q/checkpoint-765/rng_state.pth b/run-hopg4w4q/checkpoint-765/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c05919ce58d505810a93e8ac429fe4615d6a44ed --- /dev/null +++ b/run-hopg4w4q/checkpoint-765/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61ee048ae8d503bba4f7ea1991aff7539d07068a3e16840fff5c975bba6eac2b +size 14244 diff --git a/run-hopg4w4q/checkpoint-765/scheduler.pt b/run-hopg4w4q/checkpoint-765/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8388311e4da1a3a510af527ae075f7f9cd4db35 --- /dev/null +++ b/run-hopg4w4q/checkpoint-765/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb31c727c25b976cc035815e2900b0e402ed1a23319f9eae31b5ac2a7b84863 +size 1064 diff --git a/run-hopg4w4q/checkpoint-765/trainer_state.json b/run-hopg4w4q/checkpoint-765/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..011a47b4df0c7660ecdf26057739e5c4c902c850 --- /dev/null +++ b/run-hopg4w4q/checkpoint-765/trainer_state.json @@ -0,0 +1,363 @@ +{ + "best_metric": 0.9246119733924612, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-hopg4w4q/checkpoint-765", + "epoch": 18.0, + "eval_steps": 500, + "global_step": 765, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.458124158585848e-05, + "loss": 1.3492, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8464523281596452, + "eval_loss": 0.9340153932571411, + "eval_runtime": 6.5825, + "eval_samples_per_second": 548.119, + "eval_steps_per_second": 8.659, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.916248317171696e-05, + "loss": 0.9435, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.374372475757545e-05, + "loss": 0.8488, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8230447173118591, + "eval_runtime": 7.0515, + "eval_samples_per_second": 511.663, + "eval_steps_per_second": 8.083, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 9.832496634343392e-05, + "loss": 0.8105, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.81849604845047, + "eval_runtime": 6.931, + "eval_samples_per_second": 520.561, + "eval_steps_per_second": 8.224, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00012290620792929243, + "loss": 0.7972, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001474874495151509, + "loss": 0.7827, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8350496888160706, + "eval_runtime": 6.7721, + "eval_samples_per_second": 532.772, + "eval_steps_per_second": 8.417, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00017206869110100937, + "loss": 0.778, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00019664993268686784, + "loss": 0.7735, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.805936336517334, + "eval_runtime": 6.7361, + "eval_samples_per_second": 535.621, + "eval_steps_per_second": 8.462, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00022123117427272633, + "loss": 0.7677, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8267498016357422, + "eval_runtime": 6.5915, + "eval_samples_per_second": 547.373, + "eval_steps_per_second": 8.648, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00024581241585858486, + "loss": 0.7676, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0002703936574444433, + "loss": 0.7624, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8373059866962306, + "eval_loss": 0.9183188676834106, + "eval_runtime": 6.5915, + "eval_samples_per_second": 547.375, + "eval_steps_per_second": 8.648, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0002949748990303018, + "loss": 0.7659, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00029639455846884816, + "loss": 0.7616, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8306541019955654, + "eval_loss": 0.9400848746299744, + "eval_runtime": 6.6008, + "eval_samples_per_second": 546.597, + "eval_steps_per_second": 8.635, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002948242160437527, + "loss": 0.7487, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8894124168514412, + "eval_loss": 0.8416470885276794, + "eval_runtime": 6.7539, + "eval_samples_per_second": 534.212, + "eval_steps_per_second": 8.44, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00029216316258908517, + "loss": 0.7581, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00028843122469068773, + "loss": 0.7542, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8516181111335754, + "eval_runtime": 7.0812, + "eval_samples_per_second": 509.517, + "eval_steps_per_second": 8.049, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00028365620772277065, + "loss": 0.7447, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.8203222155570984, + "eval_runtime": 6.3879, + "eval_samples_per_second": 564.818, + "eval_steps_per_second": 8.923, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00027787368867971817, + "loss": 0.7404, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00027112675110424933, + "loss": 0.7316, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8316420316696167, + "eval_runtime": 6.8812, + "eval_samples_per_second": 524.33, + "eval_steps_per_second": 8.283, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00026346566408689203, + "loss": 0.7343, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0002549475077284343, + "loss": 0.7307, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8333638906478882, + "eval_runtime": 6.8576, + "eval_samples_per_second": 526.13, + "eval_steps_per_second": 8.312, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00024563574785590157, + "loss": 0.7256, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8205613493919373, + "eval_runtime": 6.8115, + "eval_samples_per_second": 529.694, + "eval_steps_per_second": 8.368, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0002355997631607031, + "loss": 0.7325, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.000224914328282079, + "loss": 0.7201, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8408406376838684, + "eval_runtime": 6.4881, + "eval_samples_per_second": 556.093, + "eval_steps_per_second": 8.785, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00021365905668721137, + "loss": 0.7209, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0002019178074989083, + "loss": 0.7144, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8243187665939331, + "eval_runtime": 6.8168, + "eval_samples_per_second": 529.283, + "eval_steps_per_second": 8.362, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001897780606903808, + "loss": 0.7206, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8073376417160034, + "eval_runtime": 6.8902, + "eval_samples_per_second": 523.645, + "eval_steps_per_second": 8.273, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00017733026530232008, + "loss": 0.7093, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00016466716553848343, + "loss": 0.7116, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.8021999001502991, + "eval_runtime": 6.8847, + "eval_samples_per_second": 524.063, + "eval_steps_per_second": 8.279, + "step": 765 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0002968657637676755, + "metric": "eval/loss", + "warmup_ratio": 0.24878488060483964 + } +} diff --git a/run-hopg4w4q/checkpoint-765/training_args.bin b/run-hopg4w4q/checkpoint-765/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2aff17527b3865db05bf823299ac2c83ae6696ae --- /dev/null +++ b/run-hopg4w4q/checkpoint-765/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbde8e42c008daa702bbebdc13a6abfb525128f6c5156dffe8ea4e3fed7ee53c +size 4792 diff --git a/run-hy3njusa/checkpoint-573/model.safetensors b/run-hy3njusa/checkpoint-573/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aad16de207dad09896e1a464e7696896dcdbd692 --- /dev/null +++ b/run-hy3njusa/checkpoint-573/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a894da4a0a50428a122ab7c13dbe8b76be3895a10ca0ef9f6ec92f9fe344a5c +size 198025308 diff --git a/run-hy3njusa/checkpoint-573/optimizer.pt b/run-hy3njusa/checkpoint-573/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0ed8c834d4c3fff2801c5bfb39184df0264c0a3 --- /dev/null +++ b/run-hy3njusa/checkpoint-573/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd205fd160ee46464ce3dbd6269fb0db882b2cf393824a83f901be0eaf3d0eda +size 395900602 diff --git a/run-hy3njusa/checkpoint-573/rng_state.pth b/run-hy3njusa/checkpoint-573/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e8713f9caaf617efce0d3935618a93ad2f5f391 --- /dev/null +++ b/run-hy3njusa/checkpoint-573/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9102bb312b12c2313ea7585eb813beef5c548592778aaea4ab0516e14ecd38e5 +size 14244 diff --git a/run-hy3njusa/checkpoint-573/scheduler.pt b/run-hy3njusa/checkpoint-573/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..246ce2a1af3a1a869e6072f4f327e299ae34e66a --- /dev/null +++ b/run-hy3njusa/checkpoint-573/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c9cb1e98e95396a0318dee427fcf7025478494733df87728132338c3746cbf +size 1064 diff --git a/run-hy3njusa/checkpoint-573/trainer_state.json b/run-hy3njusa/checkpoint-573/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3e5bc79f64a029e9f6107e3f67e7f47d170d8322 --- /dev/null +++ b/run-hy3njusa/checkpoint-573/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": 0.9180576965714665, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-hy3njusa/checkpoint-573", + "epoch": 26.96470588235294, + "eval_steps": 500, + "global_step": 573, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.9675955531473363e-05, + "loss": 1.4561, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 0.993218719959259, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.3121, + "eval_samples_per_second": 434.067, + "eval_steps_per_second": 3.489, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.935191106294673e-05, + "loss": 1.0825, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.9027866594420094e-05, + "loss": 0.9182, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8766629711751663, + "eval_f1": 0.8579695183792603, + "eval_loss": 0.9186062216758728, + "eval_precision": 0.8760737490663022, + "eval_recall": 0.8766629711751663, + "eval_runtime": 7.6521, + "eval_samples_per_second": 471.503, + "eval_steps_per_second": 3.79, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.870382212589345e-05, + "loss": 0.8442, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8998442862536573, + "eval_loss": 0.8231635689735413, + "eval_precision": 0.8954955153438833, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.1944, + "eval_samples_per_second": 440.299, + "eval_steps_per_second": 3.539, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.837977765736683e-05, + "loss": 0.8167, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00011805573318884019, + "loss": 0.7938, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9068524452303454, + "eval_loss": 0.8222822546958923, + "eval_precision": 0.904148415157449, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.9705, + "eval_samples_per_second": 452.667, + "eval_steps_per_second": 3.638, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00013773168872031354, + "loss": 0.79, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001574076442517869, + "loss": 0.7792, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.8990262904095675, + "eval_loss": 0.8174524903297424, + "eval_precision": 0.8964937316826013, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.7409, + "eval_samples_per_second": 466.098, + "eval_steps_per_second": 3.746, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00017708359978326027, + "loss": 0.7624, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.9005481208821476, + "eval_loss": 0.8261497616767883, + "eval_precision": 0.9072683430421606, + "eval_recall": 0.8991130820399114, + "eval_runtime": 7.916, + "eval_samples_per_second": 455.788, + "eval_steps_per_second": 3.663, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00019067577781341796, + "loss": 0.7665, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001901705637667836, + "loss": 0.7562, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.8905541655328297, + "eval_loss": 0.8214184641838074, + "eval_precision": 0.8939085198258114, + "eval_recall": 0.9038248337028825, + "eval_runtime": 8.0555, + "eval_samples_per_second": 447.894, + "eval_steps_per_second": 3.6, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00018904308117140568, + "loss": 0.7522, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001873007294453531, + "loss": 0.7487, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5789911308203991, + "eval_f1": 0.6471080839132971, + "eval_loss": 1.3981355428695679, + "eval_precision": 0.8693480891955936, + "eval_recall": 0.5789911308203991, + "eval_runtime": 7.89, + "eval_samples_per_second": 457.288, + "eval_steps_per_second": 3.676, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00018495494325635118, + "loss": 0.745, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9001567182976254, + "eval_loss": 0.8206666707992554, + "eval_precision": 0.8998596628014268, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.2143, + "eval_samples_per_second": 439.235, + "eval_steps_per_second": 3.53, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00018202111747859482, + "loss": 0.7434, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001785185061596055, + "loss": 0.734, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.901449241546012, + "eval_loss": 0.817486047744751, + "eval_precision": 0.9006137733034257, + "eval_recall": 0.9060421286031042, + "eval_runtime": 7.6333, + "eval_samples_per_second": 472.666, + "eval_steps_per_second": 3.799, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00017447009616019016, + "loss": 0.7285, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9077422174483958, + "eval_loss": 0.8135940432548523, + "eval_precision": 0.9042759295255597, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.1354, + "eval_samples_per_second": 443.495, + "eval_steps_per_second": 3.565, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0001699024562967749, + "loss": 0.725, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0001648455629761586, + "loss": 0.7192, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9137527643033321, + "eval_loss": 0.8072056174278259, + "eval_precision": 0.9131935457210237, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.6222, + "eval_samples_per_second": 473.353, + "eval_steps_per_second": 3.805, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00015933260346700752, + "loss": 0.7213, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00015339975809917564, + "loss": 0.716, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.897450110864745, + "eval_f1": 0.8978917357300862, + "eval_loss": 0.833185613155365, + "eval_precision": 0.901861290686609, + "eval_recall": 0.897450110864745, + "eval_runtime": 7.8806, + "eval_samples_per_second": 457.832, + "eval_steps_per_second": 3.68, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0001470859628202287, + "loss": 0.7146, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.8994216280683696, + "eval_loss": 0.8260117769241333, + "eval_precision": 0.9017612691498516, + "eval_recall": 0.9085365853658537, + "eval_runtime": 7.6314, + "eval_samples_per_second": 472.786, + "eval_steps_per_second": 3.8, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00014043265366746088, + "loss": 0.7091, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00013348349483237733, + "loss": 0.7092, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.9033395181738206, + "eval_loss": 0.8203440308570862, + "eval_precision": 0.9051904985107363, + "eval_recall": 0.9057649667405765, + "eval_runtime": 7.7282, + "eval_samples_per_second": 466.859, + "eval_steps_per_second": 3.752, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00012628409210229495, + "loss": 0.7074, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00011888169355968069, + "loss": 0.7097, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8853180413618842, + "eval_loss": 0.8357746005058289, + "eval_precision": 0.8932841806647192, + "eval_recall": 0.9013303769401331, + "eval_runtime": 8.2087, + "eval_samples_per_second": 439.536, + "eval_steps_per_second": 3.533, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00011132487950347166, + "loss": 0.7028, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8988975133528163, + "eval_loss": 0.8364317417144775, + "eval_precision": 0.9106785149626856, + "eval_recall": 0.8968957871396895, + "eval_runtime": 8.1646, + "eval_samples_per_second": 441.91, + "eval_steps_per_second": 3.552, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00010366324362735352, + "loss": 0.706, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 9.594706754735459e-05, + "loss": 0.6998, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9141884402565421, + "eval_loss": 0.8083250522613525, + "eval_precision": 0.9121270350901496, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.1949, + "eval_samples_per_second": 440.276, + "eval_steps_per_second": 3.539, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 8.822699081475803e-05, + "loss": 0.7027, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 8.055367857996437e-05, + "loss": 0.6963, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9117698452667057, + "eval_loss": 0.8125285506248474, + "eval_precision": 0.9106519737343475, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.0051, + "eval_samples_per_second": 450.712, + "eval_steps_per_second": 3.623, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 7.29774890883528e-05, + "loss": 0.7034, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9097180863493133, + "eval_loss": 0.8079902529716492, + "eval_precision": 0.9063175575919257, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.283, + "eval_samples_per_second": 435.589, + "eval_steps_per_second": 3.501, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 6.554814319029183e-05, + "loss": 0.6951, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.831439803423231e-05, + "loss": 0.692, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9132752080012447, + "eval_loss": 0.8098239898681641, + "eval_precision": 0.9122434018954185, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.1293, + "eval_samples_per_second": 443.825, + "eval_steps_per_second": 3.567, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 5.132372708436192e-05, + "loss": 0.6924, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9045815669185122, + "eval_loss": 0.8159691691398621, + "eval_precision": 0.9042476058356282, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.1922, + "eval_samples_per_second": 440.418, + "eval_steps_per_second": 3.54, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.4622008562794666e-05, + "loss": 0.695, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.82532243609804e-05, + "loss": 0.6917, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9131894340304347, + "eval_loss": 0.8099046945571899, + "eval_precision": 0.9127025060621213, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.8703, + "eval_samples_per_second": 458.435, + "eval_steps_per_second": 3.685, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.2259171396311305e-05, + "loss": 0.6921, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.6679187308228308e-05, + "loss": 0.6919, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9163153010399909, + "eval_loss": 0.7974767088890076, + "eval_precision": 0.9150258112552463, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.9749, + "eval_samples_per_second": 452.419, + "eval_steps_per_second": 3.636, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.154989229402321e-05, + "loss": 0.6918, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9144835377175147, + "eval_loss": 0.805873692035675, + "eval_precision": 0.9132686336713862, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.777, + "eval_samples_per_second": 463.933, + "eval_steps_per_second": 3.729, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.690494877861028e-05, + "loss": 0.6892, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.2774840495501727e-05, + "loss": 0.6888, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9156469144293529, + "eval_loss": 0.7999334335327148, + "eval_precision": 0.9144100639649653, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.0379, + "eval_samples_per_second": 448.875, + "eval_steps_per_second": 3.608, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 9.186672428829501e-06, + "loss": 0.6882, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 6.16399292934927e-06, + "loss": 0.6857, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.9180576965714665, + "eval_loss": 0.8053193688392639, + "eval_precision": 0.916258215964964, + "eval_recall": 0.9221175166297118, + "eval_runtime": 8.0843, + "eval_samples_per_second": 446.296, + "eval_steps_per_second": 3.587, + "step": 573 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.07674871854571184, + "learning_rate": 0.00019070541515120337, + "metric": "eval/loss", + "weight_decay": 0.14314374078467948 + } +} diff --git a/run-hy3njusa/checkpoint-573/training_args.bin b/run-hy3njusa/checkpoint-573/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dac5ff93aa6f746182b5de2f51d2aae84a4bc18b --- /dev/null +++ b/run-hy3njusa/checkpoint-573/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52d8cbe18c258d4f5cbcb4c9bf7f1e72fd5d825483f53a12a68bb0fc144cde76 +size 4792 diff --git a/run-hy3njusa/checkpoint-630/model.safetensors b/run-hy3njusa/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b185aac5a903dd6ee79d98075c05faffc7b60417 --- /dev/null +++ b/run-hy3njusa/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9376a56479b47dcee6fa3cb9e1b3b43b731aa2149fa0fb53bbd3f9792eab2bb3 +size 198025308 diff --git a/run-hy3njusa/checkpoint-630/optimizer.pt b/run-hy3njusa/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ecc314ee9d73a674355bd218e028541292c29a2 --- /dev/null +++ b/run-hy3njusa/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e36b2a19f5f5bf599cffad90f9fed00ce78d3b36706eeeaf701ff4fdc43ff131 +size 395900602 diff --git a/run-hy3njusa/checkpoint-630/rng_state.pth b/run-hy3njusa/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-hy3njusa/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-hy3njusa/checkpoint-630/scheduler.pt b/run-hy3njusa/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..16cf1e99c974349495202d86acb066541b769e61 --- /dev/null +++ b/run-hy3njusa/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1502b24026c5550b575a92683d562cbf20b5f96c81a7ed998dc2d7f4425a8c94 +size 1064 diff --git a/run-hy3njusa/checkpoint-630/trainer_state.json b/run-hy3njusa/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..503d796c52a03bc59afb166c76278532fa7fe985 --- /dev/null +++ b/run-hy3njusa/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9180576965714665, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-hy3njusa/checkpoint-573", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.9675955531473363e-05, + "loss": 1.4561, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 0.993218719959259, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.3121, + "eval_samples_per_second": 434.067, + "eval_steps_per_second": 3.489, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.935191106294673e-05, + "loss": 1.0825, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.9027866594420094e-05, + "loss": 0.9182, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8766629711751663, + "eval_f1": 0.8579695183792603, + "eval_loss": 0.9186062216758728, + "eval_precision": 0.8760737490663022, + "eval_recall": 0.8766629711751663, + "eval_runtime": 7.6521, + "eval_samples_per_second": 471.503, + "eval_steps_per_second": 3.79, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.870382212589345e-05, + "loss": 0.8442, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8998442862536573, + "eval_loss": 0.8231635689735413, + "eval_precision": 0.8954955153438833, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.1944, + "eval_samples_per_second": 440.299, + "eval_steps_per_second": 3.539, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.837977765736683e-05, + "loss": 0.8167, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00011805573318884019, + "loss": 0.7938, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9068524452303454, + "eval_loss": 0.8222822546958923, + "eval_precision": 0.904148415157449, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.9705, + "eval_samples_per_second": 452.667, + "eval_steps_per_second": 3.638, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00013773168872031354, + "loss": 0.79, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001574076442517869, + "loss": 0.7792, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.8990262904095675, + "eval_loss": 0.8174524903297424, + "eval_precision": 0.8964937316826013, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.7409, + "eval_samples_per_second": 466.098, + "eval_steps_per_second": 3.746, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00017708359978326027, + "loss": 0.7624, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.9005481208821476, + "eval_loss": 0.8261497616767883, + "eval_precision": 0.9072683430421606, + "eval_recall": 0.8991130820399114, + "eval_runtime": 7.916, + "eval_samples_per_second": 455.788, + "eval_steps_per_second": 3.663, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00019067577781341796, + "loss": 0.7665, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001901705637667836, + "loss": 0.7562, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.8905541655328297, + "eval_loss": 0.8214184641838074, + "eval_precision": 0.8939085198258114, + "eval_recall": 0.9038248337028825, + "eval_runtime": 8.0555, + "eval_samples_per_second": 447.894, + "eval_steps_per_second": 3.6, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00018904308117140568, + "loss": 0.7522, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001873007294453531, + "loss": 0.7487, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.5789911308203991, + "eval_f1": 0.6471080839132971, + "eval_loss": 1.3981355428695679, + "eval_precision": 0.8693480891955936, + "eval_recall": 0.5789911308203991, + "eval_runtime": 7.89, + "eval_samples_per_second": 457.288, + "eval_steps_per_second": 3.676, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00018495494325635118, + "loss": 0.745, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9001567182976254, + "eval_loss": 0.8206666707992554, + "eval_precision": 0.8998596628014268, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.2143, + "eval_samples_per_second": 439.235, + "eval_steps_per_second": 3.53, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00018202111747859482, + "loss": 0.7434, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001785185061596055, + "loss": 0.734, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.901449241546012, + "eval_loss": 0.817486047744751, + "eval_precision": 0.9006137733034257, + "eval_recall": 0.9060421286031042, + "eval_runtime": 7.6333, + "eval_samples_per_second": 472.666, + "eval_steps_per_second": 3.799, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00017447009616019016, + "loss": 0.7285, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9077422174483958, + "eval_loss": 0.8135940432548523, + "eval_precision": 0.9042759295255597, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.1354, + "eval_samples_per_second": 443.495, + "eval_steps_per_second": 3.565, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0001699024562967749, + "loss": 0.725, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0001648455629761586, + "loss": 0.7192, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9137527643033321, + "eval_loss": 0.8072056174278259, + "eval_precision": 0.9131935457210237, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.6222, + "eval_samples_per_second": 473.353, + "eval_steps_per_second": 3.805, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00015933260346700752, + "loss": 0.7213, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00015339975809917564, + "loss": 0.716, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.897450110864745, + "eval_f1": 0.8978917357300862, + "eval_loss": 0.833185613155365, + "eval_precision": 0.901861290686609, + "eval_recall": 0.897450110864745, + "eval_runtime": 7.8806, + "eval_samples_per_second": 457.832, + "eval_steps_per_second": 3.68, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0001470859628202287, + "loss": 0.7146, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.8994216280683696, + "eval_loss": 0.8260117769241333, + "eval_precision": 0.9017612691498516, + "eval_recall": 0.9085365853658537, + "eval_runtime": 7.6314, + "eval_samples_per_second": 472.786, + "eval_steps_per_second": 3.8, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00014043265366746088, + "loss": 0.7091, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00013348349483237733, + "loss": 0.7092, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.9033395181738206, + "eval_loss": 0.8203440308570862, + "eval_precision": 0.9051904985107363, + "eval_recall": 0.9057649667405765, + "eval_runtime": 7.7282, + "eval_samples_per_second": 466.859, + "eval_steps_per_second": 3.752, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00012628409210229495, + "loss": 0.7074, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00011888169355968069, + "loss": 0.7097, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8853180413618842, + "eval_loss": 0.8357746005058289, + "eval_precision": 0.8932841806647192, + "eval_recall": 0.9013303769401331, + "eval_runtime": 8.2087, + "eval_samples_per_second": 439.536, + "eval_steps_per_second": 3.533, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00011132487950347166, + "loss": 0.7028, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8988975133528163, + "eval_loss": 0.8364317417144775, + "eval_precision": 0.9106785149626856, + "eval_recall": 0.8968957871396895, + "eval_runtime": 8.1646, + "eval_samples_per_second": 441.91, + "eval_steps_per_second": 3.552, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00010366324362735352, + "loss": 0.706, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 9.594706754735459e-05, + "loss": 0.6998, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9141884402565421, + "eval_loss": 0.8083250522613525, + "eval_precision": 0.9121270350901496, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.1949, + "eval_samples_per_second": 440.276, + "eval_steps_per_second": 3.539, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 8.822699081475803e-05, + "loss": 0.7027, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 8.055367857996437e-05, + "loss": 0.6963, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9117698452667057, + "eval_loss": 0.8125285506248474, + "eval_precision": 0.9106519737343475, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.0051, + "eval_samples_per_second": 450.712, + "eval_steps_per_second": 3.623, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 7.29774890883528e-05, + "loss": 0.7034, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9097180863493133, + "eval_loss": 0.8079902529716492, + "eval_precision": 0.9063175575919257, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.283, + "eval_samples_per_second": 435.589, + "eval_steps_per_second": 3.501, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 6.554814319029183e-05, + "loss": 0.6951, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.831439803423231e-05, + "loss": 0.692, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9132752080012447, + "eval_loss": 0.8098239898681641, + "eval_precision": 0.9122434018954185, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.1293, + "eval_samples_per_second": 443.825, + "eval_steps_per_second": 3.567, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 5.132372708436192e-05, + "loss": 0.6924, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9045815669185122, + "eval_loss": 0.8159691691398621, + "eval_precision": 0.9042476058356282, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.1922, + "eval_samples_per_second": 440.418, + "eval_steps_per_second": 3.54, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.4622008562794666e-05, + "loss": 0.695, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.82532243609804e-05, + "loss": 0.6917, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9131894340304347, + "eval_loss": 0.8099046945571899, + "eval_precision": 0.9127025060621213, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.8703, + "eval_samples_per_second": 458.435, + "eval_steps_per_second": 3.685, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.2259171396311305e-05, + "loss": 0.6921, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.6679187308228308e-05, + "loss": 0.6919, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9163153010399909, + "eval_loss": 0.7974767088890076, + "eval_precision": 0.9150258112552463, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.9749, + "eval_samples_per_second": 452.419, + "eval_steps_per_second": 3.636, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.154989229402321e-05, + "loss": 0.6918, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9144835377175147, + "eval_loss": 0.805873692035675, + "eval_precision": 0.9132686336713862, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.777, + "eval_samples_per_second": 463.933, + "eval_steps_per_second": 3.729, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.690494877861028e-05, + "loss": 0.6892, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.2774840495501727e-05, + "loss": 0.6888, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9156469144293529, + "eval_loss": 0.7999334335327148, + "eval_precision": 0.9144100639649653, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.0379, + "eval_samples_per_second": 448.875, + "eval_steps_per_second": 3.608, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 9.186672428829501e-06, + "loss": 0.6882, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 6.16399292934927e-06, + "loss": 0.6857, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.9180576965714665, + "eval_loss": 0.8053193688392639, + "eval_precision": 0.916258215964964, + "eval_recall": 0.9221175166297118, + "eval_runtime": 8.0843, + "eval_samples_per_second": 446.296, + "eval_steps_per_second": 3.587, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 3.7266391718403138e-06, + "loss": 0.6881, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9104114592399689, + "eval_loss": 0.8099650740623474, + "eval_precision": 0.9090475621597922, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.1208, + "eval_samples_per_second": 444.29, + "eval_steps_per_second": 3.571, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.8906069681294893e-06, + "loss": 0.6877, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 6.679457901279258e-07, + "loss": 0.6865, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9159863544189508, + "eval_loss": 0.8017330169677734, + "eval_precision": 0.9137961278157899, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.9835, + "eval_samples_per_second": 451.935, + "eval_steps_per_second": 3.633, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 6.667969182084601e-08, + "loss": 0.6885, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9139937408780956, + "eval_loss": 0.8057049512863159, + "eval_precision": 0.9128103872170872, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.2673, + "eval_samples_per_second": 436.417, + "eval_steps_per_second": 3.508, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.07674871854571184, + "learning_rate": 0.00019070541515120337, + "metric": "eval/loss", + "weight_decay": 0.14314374078467948 + } +} diff --git a/run-hy3njusa/checkpoint-630/training_args.bin b/run-hy3njusa/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dac5ff93aa6f746182b5de2f51d2aae84a4bc18b --- /dev/null +++ b/run-hy3njusa/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52d8cbe18c258d4f5cbcb4c9bf7f1e72fd5d825483f53a12a68bb0fc144cde76 +size 4792 diff --git a/run-hzofpzk0/checkpoint-1260/model.safetensors b/run-hzofpzk0/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8813a8a70fb0233960b03de846217aa70b9d681 --- /dev/null +++ b/run-hzofpzk0/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192d9182d5b416050eade53eae834e34321f667b495457de8a29819d410c61f2 +size 198025308 diff --git a/run-hzofpzk0/checkpoint-1260/optimizer.pt b/run-hzofpzk0/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7791db87c5c128200a22f32c5d09c41fd4495c8 --- /dev/null +++ b/run-hzofpzk0/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f99fc28a53d6dde1e6a2515ca2baa62854988d5bf8e41063849a857f190abbf +size 395900602 diff --git a/run-hzofpzk0/checkpoint-1260/rng_state.pth b/run-hzofpzk0/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-hzofpzk0/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-hzofpzk0/checkpoint-1260/scheduler.pt b/run-hzofpzk0/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..af8ffc5699ed29ad07e72e0c16d21c33e81f415b --- /dev/null +++ b/run-hzofpzk0/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75dad33ee51226caec135060bd239e4c805ca63e68b5195bac176851f3dd5153 +size 1064 diff --git a/run-hzofpzk0/checkpoint-1260/trainer_state.json b/run-hzofpzk0/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..11b2b5cbb6e5042123eb42d12b1d3e8af58fa79e --- /dev/null +++ b/run-hzofpzk0/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9254434589800443, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-hzofpzk0/checkpoint-765", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.0121032438416355e-05, + "loss": 1.3186, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.864190687361419, + "eval_loss": 0.9389198422431946, + "eval_runtime": 6.7604, + "eval_samples_per_second": 533.694, + "eval_steps_per_second": 8.431, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 6.024206487683271e-05, + "loss": 0.9285, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 9.036309731524907e-05, + "loss": 0.8344, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.816893458366394, + "eval_runtime": 6.8915, + "eval_samples_per_second": 523.547, + "eval_steps_per_second": 8.271, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012048412975366542, + "loss": 0.8092, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8352723121643066, + "eval_runtime": 6.9771, + "eval_samples_per_second": 517.123, + "eval_steps_per_second": 8.17, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00014712711325943353, + "loss": 0.7957, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00014689195136116453, + "loss": 0.7793, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8356844782829285, + "eval_runtime": 6.9098, + "eval_samples_per_second": 522.157, + "eval_steps_per_second": 8.249, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001462758436945397, + "loss": 0.7757, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00014528199103441036, + "loss": 0.7669, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8013181090354919, + "eval_runtime": 7.0508, + "eval_samples_per_second": 511.717, + "eval_steps_per_second": 8.084, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00014391555659943575, + "loss": 0.7604, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8169719576835632, + "eval_runtime": 6.925, + "eval_samples_per_second": 521.011, + "eval_steps_per_second": 8.231, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00014218363922836167, + "loss": 0.752, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00014009523650045246, + "loss": 0.7457, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8311586380004883, + "eval_runtime": 6.7133, + "eval_samples_per_second": 537.444, + "eval_steps_per_second": 8.491, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00013766119799167185, + "loss": 0.7455, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00013489416890945355, + "loss": 0.7393, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8045174479484558, + "eval_runtime": 6.7979, + "eval_samples_per_second": 530.749, + "eval_steps_per_second": 8.385, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013180852439888816, + "loss": 0.7307, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8253883123397827, + "eval_runtime": 6.8442, + "eval_samples_per_second": 527.163, + "eval_steps_per_second": 8.328, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001284202948616156, + "loss": 0.7337, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00012474708267540326, + "loss": 0.7261, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.807209849357605, + "eval_runtime": 6.6224, + "eval_samples_per_second": 544.819, + "eval_steps_per_second": 8.607, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00012080797074706431, + "loss": 0.7229, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8088923096656799, + "eval_runtime": 6.8062, + "eval_samples_per_second": 530.104, + "eval_steps_per_second": 8.375, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00011662342337379818, + "loss": 0.7191, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00011221517992799368, + "loss": 0.7183, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8007187247276306, + "eval_runtime": 6.2665, + "eval_samples_per_second": 575.759, + "eval_steps_per_second": 9.096, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00010760614191781908, + "loss": 0.7166, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00010282025401033622, + "loss": 0.7132, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8111060857772827, + "eval_runtime": 6.6698, + "eval_samples_per_second": 540.943, + "eval_steps_per_second": 8.546, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 9.78823796352429e-05, + "loss": 0.71, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8086754679679871, + "eval_runtime": 6.8025, + "eval_samples_per_second": 530.393, + "eval_steps_per_second": 8.379, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 9.281817181550047e-05, + "loss": 0.7141, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 8.765393989590249e-05, + "loss": 0.7054, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.811958372592926, + "eval_runtime": 6.6182, + "eval_samples_per_second": 545.167, + "eval_steps_per_second": 8.613, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 8.241651286194919e-05, + "loss": 0.7086, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 7.713309995910833e-05, + "loss": 0.7018, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8114797472953796, + "eval_runtime": 6.7814, + "eval_samples_per_second": 532.042, + "eval_steps_per_second": 8.405, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 7.183114933656632e-05, + "loss": 0.7036, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8080512881278992, + "eval_runtime": 6.6797, + "eval_samples_per_second": 540.146, + "eval_steps_per_second": 8.533, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 6.653820544983747e-05, + "loss": 0.6973, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 6.12817659630473e-05, + "loss": 0.6997, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9254434589800443, + "eval_loss": 0.8000662922859192, + "eval_runtime": 6.4983, + "eval_samples_per_second": 555.221, + "eval_steps_per_second": 8.772, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 5.608913889430369e-05, + "loss": 0.6983, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 5.098730074630927e-05, + "loss": 0.6981, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8095241785049438, + "eval_runtime": 6.536, + "eval_samples_per_second": 552.016, + "eval_steps_per_second": 8.721, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 4.600275635924706e-05, + "loss": 0.7022, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.7963194847106934, + "eval_runtime": 6.8456, + "eval_samples_per_second": 527.053, + "eval_steps_per_second": 8.326, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 4.1161401214026535e-05, + "loss": 0.6946, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 3.6488386901245336e-05, + "loss": 0.694, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8088030219078064, + "eval_runtime": 6.8266, + "eval_samples_per_second": 528.521, + "eval_steps_per_second": 8.35, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.200799045477641e-05, + "loss": 0.6915, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8104172348976135, + "eval_runtime": 6.7158, + "eval_samples_per_second": 537.239, + "eval_steps_per_second": 8.487, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 2.774348822881325e-05, + "loss": 0.6952, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.371703497360093e-05, + "loss": 0.6912, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8036364912986755, + "eval_runtime": 6.8282, + "eval_samples_per_second": 528.397, + "eval_steps_per_second": 8.348, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.9949548738073628e-05, + "loss": 0.6919, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.646060219734724e-05, + "loss": 0.6919, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8026348948478699, + "eval_runtime": 6.804, + "eval_samples_per_second": 530.277, + "eval_steps_per_second": 8.377, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.3268320969637544e-05, + "loss": 0.6881, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.8044987320899963, + "eval_runtime": 6.6269, + "eval_samples_per_second": 544.445, + "eval_steps_per_second": 8.601, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.038928945086333e-05, + "loss": 0.6883, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.838464656138292e-06, + "loss": 0.6867, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8069139719009399, + "eval_runtime": 6.7979, + "eval_samples_per_second": 530.752, + "eval_steps_per_second": 8.385, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.629098515758495e-06, + "loss": 0.6875, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.7726690293700006e-06, + "loss": 0.6938, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.7973284721374512, + "eval_runtime": 6.6789, + "eval_samples_per_second": 540.206, + "eval_steps_per_second": 8.534, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.2788206359809375e-06, + "loss": 0.6897, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.7977913618087769, + "eval_runtime": 6.8017, + "eval_samples_per_second": 530.457, + "eval_steps_per_second": 8.38, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.1553141096056957e-06, + "loss": 0.685, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.079862408406114e-07, + "loss": 0.6853, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8057315945625305, + "eval_runtime": 6.9258, + "eval_samples_per_second": 520.952, + "eval_steps_per_second": 8.23, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 4.071951383185761e-08, + "loss": 0.6875, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7974627017974854, + "eval_runtime": 6.5785, + "eval_samples_per_second": 548.451, + "eval_steps_per_second": 8.665, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00014712965844918758, + "metric": "eval/loss", + "warmup_ratio": 0.10038289629174348 + } +} diff --git a/run-hzofpzk0/checkpoint-1260/training_args.bin b/run-hzofpzk0/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8089d09d6bfb1731d89f9aa1be2d2e3d3d180c24 --- /dev/null +++ b/run-hzofpzk0/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64931b89d3dace740454cf3cf3e5135fb82c98dfa675fe4591df106811ea892 +size 4792 diff --git a/run-hzofpzk0/checkpoint-765/model.safetensors b/run-hzofpzk0/checkpoint-765/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64555a36f60266330aa328f6e9b07225a7b4f738 --- /dev/null +++ b/run-hzofpzk0/checkpoint-765/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264b29e5eee6f197309fd3491d08d9c52a38d91c6b8af5d3069739909b7d7c5d +size 198025308 diff --git a/run-hzofpzk0/checkpoint-765/optimizer.pt b/run-hzofpzk0/checkpoint-765/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e793832867542b3ea5838076fc97a96b8a71235 --- /dev/null +++ b/run-hzofpzk0/checkpoint-765/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:933737db79aaac147a722d5b93f3586759928d7e37b14fce662018a85751cefe +size 395900602 diff --git a/run-hzofpzk0/checkpoint-765/rng_state.pth b/run-hzofpzk0/checkpoint-765/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c05919ce58d505810a93e8ac429fe4615d6a44ed --- /dev/null +++ b/run-hzofpzk0/checkpoint-765/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61ee048ae8d503bba4f7ea1991aff7539d07068a3e16840fff5c975bba6eac2b +size 14244 diff --git a/run-hzofpzk0/checkpoint-765/scheduler.pt b/run-hzofpzk0/checkpoint-765/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..500ccc759d2080e63fb790c057011b1e003380ad --- /dev/null +++ b/run-hzofpzk0/checkpoint-765/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9923099f8942b22a4254ef5ee0e198b94cca9f7c7d3b24f71b9b66c764a2eae3 +size 1064 diff --git a/run-hzofpzk0/checkpoint-765/trainer_state.json b/run-hzofpzk0/checkpoint-765/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..391d965a64f2a0ec5626da9eaff6fdb86dd8ffed --- /dev/null +++ b/run-hzofpzk0/checkpoint-765/trainer_state.json @@ -0,0 +1,363 @@ +{ + "best_metric": 0.9254434589800443, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-hzofpzk0/checkpoint-765", + "epoch": 18.0, + "eval_steps": 500, + "global_step": 765, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.0121032438416355e-05, + "loss": 1.3186, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.864190687361419, + "eval_loss": 0.9389198422431946, + "eval_runtime": 6.7604, + "eval_samples_per_second": 533.694, + "eval_steps_per_second": 8.431, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 6.024206487683271e-05, + "loss": 0.9285, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 9.036309731524907e-05, + "loss": 0.8344, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.816893458366394, + "eval_runtime": 6.8915, + "eval_samples_per_second": 523.547, + "eval_steps_per_second": 8.271, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012048412975366542, + "loss": 0.8092, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8352723121643066, + "eval_runtime": 6.9771, + "eval_samples_per_second": 517.123, + "eval_steps_per_second": 8.17, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00014712711325943353, + "loss": 0.7957, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00014689195136116453, + "loss": 0.7793, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8356844782829285, + "eval_runtime": 6.9098, + "eval_samples_per_second": 522.157, + "eval_steps_per_second": 8.249, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001462758436945397, + "loss": 0.7757, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00014528199103441036, + "loss": 0.7669, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8013181090354919, + "eval_runtime": 7.0508, + "eval_samples_per_second": 511.717, + "eval_steps_per_second": 8.084, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00014391555659943575, + "loss": 0.7604, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8169719576835632, + "eval_runtime": 6.925, + "eval_samples_per_second": 521.011, + "eval_steps_per_second": 8.231, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00014218363922836167, + "loss": 0.752, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00014009523650045246, + "loss": 0.7457, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8311586380004883, + "eval_runtime": 6.7133, + "eval_samples_per_second": 537.444, + "eval_steps_per_second": 8.491, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00013766119799167185, + "loss": 0.7455, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00013489416890945355, + "loss": 0.7393, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8045174479484558, + "eval_runtime": 6.7979, + "eval_samples_per_second": 530.749, + "eval_steps_per_second": 8.385, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013180852439888816, + "loss": 0.7307, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8253883123397827, + "eval_runtime": 6.8442, + "eval_samples_per_second": 527.163, + "eval_steps_per_second": 8.328, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001284202948616156, + "loss": 0.7337, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00012474708267540326, + "loss": 0.7261, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.807209849357605, + "eval_runtime": 6.6224, + "eval_samples_per_second": 544.819, + "eval_steps_per_second": 8.607, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00012080797074706431, + "loss": 0.7229, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8088923096656799, + "eval_runtime": 6.8062, + "eval_samples_per_second": 530.104, + "eval_steps_per_second": 8.375, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00011662342337379818, + "loss": 0.7191, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00011221517992799368, + "loss": 0.7183, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8007187247276306, + "eval_runtime": 6.2665, + "eval_samples_per_second": 575.759, + "eval_steps_per_second": 9.096, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00010760614191781908, + "loss": 0.7166, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00010282025401033622, + "loss": 0.7132, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8111060857772827, + "eval_runtime": 6.6698, + "eval_samples_per_second": 540.943, + "eval_steps_per_second": 8.546, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 9.78823796352429e-05, + "loss": 0.71, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8086754679679871, + "eval_runtime": 6.8025, + "eval_samples_per_second": 530.393, + "eval_steps_per_second": 8.379, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 9.281817181550047e-05, + "loss": 0.7141, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 8.765393989590249e-05, + "loss": 0.7054, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.811958372592926, + "eval_runtime": 6.6182, + "eval_samples_per_second": 545.167, + "eval_steps_per_second": 8.613, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 8.241651286194919e-05, + "loss": 0.7086, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 7.713309995910833e-05, + "loss": 0.7018, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8114797472953796, + "eval_runtime": 6.7814, + "eval_samples_per_second": 532.042, + "eval_steps_per_second": 8.405, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 7.183114933656632e-05, + "loss": 0.7036, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8080512881278992, + "eval_runtime": 6.6797, + "eval_samples_per_second": 540.146, + "eval_steps_per_second": 8.533, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 6.653820544983747e-05, + "loss": 0.6973, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 6.12817659630473e-05, + "loss": 0.6997, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9254434589800443, + "eval_loss": 0.8000662922859192, + "eval_runtime": 6.4983, + "eval_samples_per_second": 555.221, + "eval_steps_per_second": 8.772, + "step": 765 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00014712965844918758, + "metric": "eval/loss", + "warmup_ratio": 0.10038289629174348 + } +} diff --git a/run-hzofpzk0/checkpoint-765/training_args.bin b/run-hzofpzk0/checkpoint-765/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8089d09d6bfb1731d89f9aa1be2d2e3d3d180c24 --- /dev/null +++ b/run-hzofpzk0/checkpoint-765/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64931b89d3dace740454cf3cf3e5135fb82c98dfa675fe4591df106811ea892 +size 4792 diff --git a/run-i06psot1/checkpoint-1232/model.safetensors b/run-i06psot1/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c344ad9946ab16754950d3311e0c8e708678460 --- /dev/null +++ b/run-i06psot1/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a2002d96a18ff7348d215ac06873dfdbd019ea8442e40615a81b3bacf697c8 +size 198025308 diff --git a/run-i06psot1/checkpoint-1232/optimizer.pt b/run-i06psot1/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e8b4ba17d60aaa4f9a683886a9986cdcd8bcafe --- /dev/null +++ b/run-i06psot1/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b928f0887aea364ebbda4f980c5d06d3b68ff7229ce1834ca9da249a9a4bbbe1 +size 395900602 diff --git a/run-i06psot1/checkpoint-1232/rng_state.pth b/run-i06psot1/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-i06psot1/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-i06psot1/checkpoint-1232/scheduler.pt b/run-i06psot1/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..404dcef3a426fab81c219d2612acfeb373f2999d --- /dev/null +++ b/run-i06psot1/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ad86f8f3d101b8677ec051a82c1bcced0ad0d6d906f02d5d43e738118591ec6 +size 1064 diff --git a/run-i06psot1/checkpoint-1232/trainer_state.json b/run-i06psot1/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6dd808fe3321f23cd96538e8f31ae7da3b85dd83 --- /dev/null +++ b/run-i06psot1/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9168514412416852, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-i06psot1/checkpoint-1147", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.541214768745637e-05, + "loss": 1.2224, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8899667405764967, + "eval_loss": 0.933688759803772, + "eval_runtime": 7.0131, + "eval_samples_per_second": 514.463, + "eval_steps_per_second": 8.128, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00013082429537491274, + "loss": 0.8664, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00019623644306236915, + "loss": 0.8105, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8463776111602783, + "eval_runtime": 6.4359, + "eval_samples_per_second": 560.608, + "eval_steps_per_second": 8.857, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002616485907498255, + "loss": 0.8009, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8496062755584717, + "eval_runtime": 7.1343, + "eval_samples_per_second": 505.725, + "eval_steps_per_second": 7.99, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00032706073843728186, + "loss": 0.7988, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0003924728861247383, + "loss": 0.7933, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7253325942350333, + "eval_loss": 1.0598738193511963, + "eval_runtime": 7.0991, + "eval_samples_per_second": 508.237, + "eval_steps_per_second": 8.029, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004578850338121947, + "loss": 0.7905, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.000523297181499651, + "loss": 0.7906, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8367516629711752, + "eval_loss": 0.9263951182365417, + "eval_runtime": 6.9769, + "eval_samples_per_second": 517.136, + "eval_steps_per_second": 8.17, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005887093291871075, + "loss": 0.8017, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.850609756097561, + "eval_loss": 0.9293613433837891, + "eval_runtime": 6.5396, + "eval_samples_per_second": 551.716, + "eval_steps_per_second": 8.716, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0006541214768745637, + "loss": 0.8037, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007195336245620201, + "loss": 0.8115, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8425720620842572, + "eval_loss": 0.9486232399940491, + "eval_runtime": 6.7346, + "eval_samples_per_second": 535.744, + "eval_steps_per_second": 8.464, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007849457722494766, + "loss": 0.8312, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0008503579199369329, + "loss": 0.8231, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8342572062084257, + "eval_loss": 0.9842740297317505, + "eval_runtime": 6.6888, + "eval_samples_per_second": 539.406, + "eval_steps_per_second": 8.522, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0009157700676243894, + "loss": 0.8242, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8220620842572062, + "eval_loss": 0.9498592615127563, + "eval_runtime": 6.6054, + "eval_samples_per_second": 546.222, + "eval_steps_per_second": 8.629, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0009270774766819185, + "loss": 0.8286, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0009219902018209498, + "loss": 0.829, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8655764966740577, + "eval_loss": 0.896056592464447, + "eval_runtime": 6.9451, + "eval_samples_per_second": 519.503, + "eval_steps_per_second": 8.207, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0009130580947987899, + "loss": 0.829, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8786031042128604, + "eval_loss": 0.868511438369751, + "eval_runtime": 6.9859, + "eval_samples_per_second": 516.469, + "eval_steps_per_second": 8.159, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.000900356169347966, + "loss": 0.8265, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008839910989371211, + "loss": 0.8148, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.850609756097561, + "eval_loss": 0.9289471507072449, + "eval_runtime": 6.5605, + "eval_samples_per_second": 549.958, + "eval_steps_per_second": 8.688, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0008641003209045607, + "loss": 0.8212, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0008408508822302846, + "loss": 0.8122, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8644678492239468, + "eval_loss": 0.9125615358352661, + "eval_runtime": 6.9192, + "eval_samples_per_second": 521.447, + "eval_steps_per_second": 8.238, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0008144380366399581, + "loss": 0.7993, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.8889752626419067, + "eval_runtime": 6.7584, + "eval_samples_per_second": 533.851, + "eval_steps_per_second": 8.434, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0007850836048226384, + "loss": 0.8168, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0007530341115334994, + "loss": 0.7939, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8580931263858093, + "eval_loss": 0.8910103440284729, + "eval_runtime": 6.8248, + "eval_samples_per_second": 528.662, + "eval_steps_per_second": 8.352, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0007185587152265607, + "loss": 0.7982, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0006819469476048025, + "loss": 0.7902, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.883869179600887, + "eval_loss": 0.8613596558570862, + "eval_runtime": 7.1252, + "eval_samples_per_second": 506.374, + "eval_steps_per_second": 8.0, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0006435062820714101, + "loss": 0.7889, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.897450110864745, + "eval_loss": 0.8360085487365723, + "eval_runtime": 6.9936, + "eval_samples_per_second": 515.899, + "eval_steps_per_second": 8.15, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0006035595515028071, + "loss": 0.7743, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0005624422370295669, + "loss": 0.7766, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.8980044345898004, + "eval_loss": 0.8370790481567383, + "eval_runtime": 6.7582, + "eval_samples_per_second": 533.87, + "eval_steps_per_second": 8.434, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.000520499650594602, + "loss": 0.7605, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0004780840349501006, + "loss": 0.7672, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8291624784469604, + "eval_runtime": 6.6618, + "eval_samples_per_second": 541.599, + "eval_steps_per_second": 8.556, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0004355516054480542, + "loss": 0.7673, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8342077136039734, + "eval_runtime": 6.5845, + "eval_samples_per_second": 547.955, + "eval_steps_per_second": 8.657, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003932595584680487, + "loss": 0.753, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0003515630716061774, + "loss": 0.7456, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8980044345898004, + "eval_loss": 0.8300418853759766, + "eval_runtime": 6.6947, + "eval_samples_per_second": 538.93, + "eval_steps_per_second": 8.514, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00031081232081812607, + "loss": 0.7405, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.833048403263092, + "eval_runtime": 6.6875, + "eval_samples_per_second": 539.517, + "eval_steps_per_second": 8.523, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002713495395670971, + "loss": 0.739, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00023350614467447352, + "loss": 0.733, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8205416202545166, + "eval_runtime": 6.8777, + "eval_samples_per_second": 524.594, + "eval_steps_per_second": 8.288, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00019759995301093847, + "loss": 0.7327, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.0001639325124028701, + "loss": 0.7206, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.825107753276825, + "eval_runtime": 6.78, + "eval_samples_per_second": 532.156, + "eval_steps_per_second": 8.407, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.0001327865691696285, + "loss": 0.7172, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8140425682067871, + "eval_runtime": 7.1075, + "eval_samples_per_second": 507.635, + "eval_steps_per_second": 8.02, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 0.00010442369355989066, + "loss": 0.7189, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.908208302912936e-05, + "loss": 0.716, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8235895037651062, + "eval_runtime": 6.6445, + "eval_samples_per_second": 543.002, + "eval_steps_per_second": 8.578, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.697456180677229e-05, + "loss": 0.7108, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.82867935531062e-05, + "loss": 0.7137, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8052936792373657, + "eval_runtime": 6.7672, + "eval_samples_per_second": 533.158, + "eval_steps_per_second": 8.423, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.317572211641146e-05, + "loss": 0.7074, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8162761330604553, + "eval_runtime": 6.9795, + "eval_samples_per_second": 516.941, + "eval_steps_per_second": 8.167, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.1768253485178214e-05, + "loss": 0.7033, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.160190004652845e-06, + "loss": 0.7033, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8134660720825195, + "eval_runtime": 6.7608, + "eval_samples_per_second": 533.661, + "eval_steps_per_second": 8.431, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.000928349326795054, + "metric": "eval/loss", + "warmup_ratio": 0.2923109364735419 + } +} diff --git a/run-i06psot1/checkpoint-1232/training_args.bin b/run-i06psot1/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7704a0d73c4bcb3f10ef43a083aa23dfc9f0139f --- /dev/null +++ b/run-i06psot1/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf4b8ad7fabb9d91283b8e3cbdd7abd63ce10b4e22f2eacf80e028c0598c4da +size 4792 diff --git a/run-i06psot1/checkpoint-1260/model.safetensors b/run-i06psot1/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0c3162295c391465c52cb44ff0c0e203db25b63 --- /dev/null +++ b/run-i06psot1/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c665c12f6cd2f6a54e83f68bc92becbab5e4ce25d7381f9da8b27055afd1c486 +size 198025308 diff --git a/run-i06psot1/checkpoint-1260/optimizer.pt b/run-i06psot1/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea3e3a3dcb7d3690ee1b58ff2d5d64665acea158 --- /dev/null +++ b/run-i06psot1/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54d01d653f22011bd17d76222dcad3f5026bb67031992b20eab22e1066e96460 +size 395900602 diff --git a/run-i06psot1/checkpoint-1260/rng_state.pth b/run-i06psot1/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-i06psot1/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-i06psot1/checkpoint-1260/scheduler.pt b/run-i06psot1/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac96796c08aa802fc4a12183d6969b4e2d2d7c0f --- /dev/null +++ b/run-i06psot1/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d1e6c549bd95751f61879cb1040ff6335c335feba58f036d71a3a0660cbc3b +size 1064 diff --git a/run-i06psot1/checkpoint-1260/trainer_state.json b/run-i06psot1/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1a73aab2129f604b3357955c413b8652eec69784 --- /dev/null +++ b/run-i06psot1/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9182372505543237, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-i06psot1/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.541214768745637e-05, + "loss": 1.2224, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8899667405764967, + "eval_loss": 0.933688759803772, + "eval_runtime": 7.0131, + "eval_samples_per_second": 514.463, + "eval_steps_per_second": 8.128, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00013082429537491274, + "loss": 0.8664, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00019623644306236915, + "loss": 0.8105, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8463776111602783, + "eval_runtime": 6.4359, + "eval_samples_per_second": 560.608, + "eval_steps_per_second": 8.857, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002616485907498255, + "loss": 0.8009, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8496062755584717, + "eval_runtime": 7.1343, + "eval_samples_per_second": 505.725, + "eval_steps_per_second": 7.99, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00032706073843728186, + "loss": 0.7988, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0003924728861247383, + "loss": 0.7933, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7253325942350333, + "eval_loss": 1.0598738193511963, + "eval_runtime": 7.0991, + "eval_samples_per_second": 508.237, + "eval_steps_per_second": 8.029, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004578850338121947, + "loss": 0.7905, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.000523297181499651, + "loss": 0.7906, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8367516629711752, + "eval_loss": 0.9263951182365417, + "eval_runtime": 6.9769, + "eval_samples_per_second": 517.136, + "eval_steps_per_second": 8.17, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005887093291871075, + "loss": 0.8017, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.850609756097561, + "eval_loss": 0.9293613433837891, + "eval_runtime": 6.5396, + "eval_samples_per_second": 551.716, + "eval_steps_per_second": 8.716, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0006541214768745637, + "loss": 0.8037, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007195336245620201, + "loss": 0.8115, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8425720620842572, + "eval_loss": 0.9486232399940491, + "eval_runtime": 6.7346, + "eval_samples_per_second": 535.744, + "eval_steps_per_second": 8.464, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007849457722494766, + "loss": 0.8312, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0008503579199369329, + "loss": 0.8231, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8342572062084257, + "eval_loss": 0.9842740297317505, + "eval_runtime": 6.6888, + "eval_samples_per_second": 539.406, + "eval_steps_per_second": 8.522, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0009157700676243894, + "loss": 0.8242, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8220620842572062, + "eval_loss": 0.9498592615127563, + "eval_runtime": 6.6054, + "eval_samples_per_second": 546.222, + "eval_steps_per_second": 8.629, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0009270774766819185, + "loss": 0.8286, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0009219902018209498, + "loss": 0.829, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8655764966740577, + "eval_loss": 0.896056592464447, + "eval_runtime": 6.9451, + "eval_samples_per_second": 519.503, + "eval_steps_per_second": 8.207, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0009130580947987899, + "loss": 0.829, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8786031042128604, + "eval_loss": 0.868511438369751, + "eval_runtime": 6.9859, + "eval_samples_per_second": 516.469, + "eval_steps_per_second": 8.159, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.000900356169347966, + "loss": 0.8265, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008839910989371211, + "loss": 0.8148, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.850609756097561, + "eval_loss": 0.9289471507072449, + "eval_runtime": 6.5605, + "eval_samples_per_second": 549.958, + "eval_steps_per_second": 8.688, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0008641003209045607, + "loss": 0.8212, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0008408508822302846, + "loss": 0.8122, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8644678492239468, + "eval_loss": 0.9125615358352661, + "eval_runtime": 6.9192, + "eval_samples_per_second": 521.447, + "eval_steps_per_second": 8.238, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0008144380366399581, + "loss": 0.7993, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.8889752626419067, + "eval_runtime": 6.7584, + "eval_samples_per_second": 533.851, + "eval_steps_per_second": 8.434, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0007850836048226384, + "loss": 0.8168, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0007530341115334994, + "loss": 0.7939, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8580931263858093, + "eval_loss": 0.8910103440284729, + "eval_runtime": 6.8248, + "eval_samples_per_second": 528.662, + "eval_steps_per_second": 8.352, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0007185587152265607, + "loss": 0.7982, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0006819469476048025, + "loss": 0.7902, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.883869179600887, + "eval_loss": 0.8613596558570862, + "eval_runtime": 7.1252, + "eval_samples_per_second": 506.374, + "eval_steps_per_second": 8.0, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0006435062820714101, + "loss": 0.7889, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.897450110864745, + "eval_loss": 0.8360085487365723, + "eval_runtime": 6.9936, + "eval_samples_per_second": 515.899, + "eval_steps_per_second": 8.15, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0006035595515028071, + "loss": 0.7743, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0005624422370295669, + "loss": 0.7766, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.8980044345898004, + "eval_loss": 0.8370790481567383, + "eval_runtime": 6.7582, + "eval_samples_per_second": 533.87, + "eval_steps_per_second": 8.434, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.000520499650594602, + "loss": 0.7605, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0004780840349501006, + "loss": 0.7672, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8291624784469604, + "eval_runtime": 6.6618, + "eval_samples_per_second": 541.599, + "eval_steps_per_second": 8.556, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0004355516054480542, + "loss": 0.7673, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8342077136039734, + "eval_runtime": 6.5845, + "eval_samples_per_second": 547.955, + "eval_steps_per_second": 8.657, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003932595584680487, + "loss": 0.753, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0003515630716061774, + "loss": 0.7456, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8980044345898004, + "eval_loss": 0.8300418853759766, + "eval_runtime": 6.6947, + "eval_samples_per_second": 538.93, + "eval_steps_per_second": 8.514, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00031081232081812607, + "loss": 0.7405, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.833048403263092, + "eval_runtime": 6.6875, + "eval_samples_per_second": 539.517, + "eval_steps_per_second": 8.523, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002713495395670971, + "loss": 0.739, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00023350614467447352, + "loss": 0.733, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8205416202545166, + "eval_runtime": 6.8777, + "eval_samples_per_second": 524.594, + "eval_steps_per_second": 8.288, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00019759995301093847, + "loss": 0.7327, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.0001639325124028701, + "loss": 0.7206, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.825107753276825, + "eval_runtime": 6.78, + "eval_samples_per_second": 532.156, + "eval_steps_per_second": 8.407, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.0001327865691696285, + "loss": 0.7172, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8140425682067871, + "eval_runtime": 7.1075, + "eval_samples_per_second": 507.635, + "eval_steps_per_second": 8.02, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 0.00010442369355989066, + "loss": 0.7189, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.908208302912936e-05, + "loss": 0.716, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8235895037651062, + "eval_runtime": 6.6445, + "eval_samples_per_second": 543.002, + "eval_steps_per_second": 8.578, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.697456180677229e-05, + "loss": 0.7108, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.82867935531062e-05, + "loss": 0.7137, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8052936792373657, + "eval_runtime": 6.7672, + "eval_samples_per_second": 533.158, + "eval_steps_per_second": 8.423, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.317572211641146e-05, + "loss": 0.7074, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8162761330604553, + "eval_runtime": 6.9795, + "eval_samples_per_second": 516.941, + "eval_steps_per_second": 8.167, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.1768253485178214e-05, + "loss": 0.7033, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.160190004652845e-06, + "loss": 0.7033, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8134660720825195, + "eval_runtime": 6.7608, + "eval_samples_per_second": 533.661, + "eval_steps_per_second": 8.431, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 4.1542580838746955e-07, + "loss": 0.709, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8086972832679749, + "eval_runtime": 6.8863, + "eval_samples_per_second": 523.94, + "eval_steps_per_second": 8.277, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.000928349326795054, + "metric": "eval/loss", + "warmup_ratio": 0.2923109364735419 + } +} diff --git a/run-i06psot1/checkpoint-1260/training_args.bin b/run-i06psot1/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7704a0d73c4bcb3f10ef43a083aa23dfc9f0139f --- /dev/null +++ b/run-i06psot1/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf4b8ad7fabb9d91283b8e3cbdd7abd63ce10b4e22f2eacf80e028c0598c4da +size 4792 diff --git a/run-iy40zr57/checkpoint-1260/model.safetensors b/run-iy40zr57/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdbb2d397cfa8df0f2fbd57ecfe7335b9e7b4c56 --- /dev/null +++ b/run-iy40zr57/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770f7cd0b1c281bef60fa8f7b958b4e63d99a3ab0bffdf12558becb6b3d2b432 +size 198025308 diff --git a/run-iy40zr57/checkpoint-1260/optimizer.pt b/run-iy40zr57/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d5ee58df284829ba29ee83d7f1b75067321ab68 --- /dev/null +++ b/run-iy40zr57/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7d1528a842f73e600fb1db1dd1a803171c13286d6806bf9f73fe35fe3a74e0f +size 395900602 diff --git a/run-iy40zr57/checkpoint-1260/rng_state.pth b/run-iy40zr57/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-iy40zr57/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-iy40zr57/checkpoint-1260/scheduler.pt b/run-iy40zr57/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..57cc94b514b0a6063735eb0bcc2f14eaf0ff4f12 --- /dev/null +++ b/run-iy40zr57/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb463c83fb7cfcd3a4956dcd12d4a233e93cd9a19c15839d9b9d52a8b4a20b3 +size 1064 diff --git a/run-iy40zr57/checkpoint-1260/trainer_state.json b/run-iy40zr57/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0318b06d4af478530e5a365153421d4b8b92d25f --- /dev/null +++ b/run-iy40zr57/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9287694013303769, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-iy40zr57/checkpoint-340", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.8257817132058874e-06, + "loss": 1.4784, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_loss": 1.090883731842041, + "eval_runtime": 6.7636, + "eval_samples_per_second": 533.444, + "eval_steps_per_second": 8.427, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 1.3651563426411775e-05, + "loss": 1.1536, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 2.0477345139617665e-05, + "loss": 0.9422, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.873059866962306, + "eval_loss": 0.8888150453567505, + "eval_runtime": 6.5024, + "eval_samples_per_second": 554.873, + "eval_steps_per_second": 8.766, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 2.730312685282355e-05, + "loss": 0.8757, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8299553394317627, + "eval_runtime": 6.6301, + "eval_samples_per_second": 544.189, + "eval_steps_per_second": 8.597, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 3.412890856602944e-05, + "loss": 0.8396, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 4.095469027923533e-05, + "loss": 0.8127, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8024356961250305, + "eval_runtime": 6.6955, + "eval_samples_per_second": 538.872, + "eval_steps_per_second": 8.513, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 4.778047199244122e-05, + "loss": 0.801, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 5.46062537056471e-05, + "loss": 0.7925, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.7971590757369995, + "eval_runtime": 6.8032, + "eval_samples_per_second": 530.341, + "eval_steps_per_second": 8.378, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 6.1432035418853e-05, + "loss": 0.7888, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7974677681922913, + "eval_runtime": 6.9987, + "eval_samples_per_second": 515.527, + "eval_steps_per_second": 8.144, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 6.825781713205888e-05, + "loss": 0.7764, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 7.508359884526477e-05, + "loss": 0.779, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8010255098342896, + "eval_runtime": 6.7141, + "eval_samples_per_second": 537.376, + "eval_steps_per_second": 8.49, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 8.190938055847066e-05, + "loss": 0.7702, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 8.873516227167655e-05, + "loss": 0.7533, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9287694013303769, + "eval_loss": 0.8030133843421936, + "eval_runtime": 6.7151, + "eval_samples_per_second": 537.294, + "eval_steps_per_second": 8.488, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 9.556094398488244e-05, + "loss": 0.7537, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.7946798205375671, + "eval_runtime": 6.6744, + "eval_samples_per_second": 540.577, + "eval_steps_per_second": 8.54, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010238672569808832, + "loss": 0.7587, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001092125074112942, + "loss": 0.7504, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8057782649993896, + "eval_runtime": 6.6799, + "eval_samples_per_second": 540.132, + "eval_steps_per_second": 8.533, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00011498145053914161, + "loss": 0.7416, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.883869179600887, + "eval_loss": 0.8608627319335938, + "eval_runtime": 6.6108, + "eval_samples_per_second": 545.776, + "eval_steps_per_second": 8.622, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0001146106697272715, + "loss": 0.7462, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00011367637109794929, + "loss": 0.7383, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.897450110864745, + "eval_loss": 0.8359660506248474, + "eval_runtime": 6.6498, + "eval_samples_per_second": 542.571, + "eval_steps_per_second": 8.572, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00011218777253284118, + "loss": 0.7343, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00011015956069099923, + "loss": 0.7279, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8137813210487366, + "eval_runtime": 6.8645, + "eval_samples_per_second": 525.602, + "eval_steps_per_second": 8.304, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00010761174610884655, + "loss": 0.7252, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8077019453048706, + "eval_runtime": 6.8185, + "eval_samples_per_second": 529.147, + "eval_steps_per_second": 8.36, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010456946577426759, + "loss": 0.7277, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001010627351226269, + "loss": 0.716, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8134342432022095, + "eval_runtime": 6.64, + "eval_samples_per_second": 543.377, + "eval_steps_per_second": 8.584, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 9.712615190154781e-05, + "loss": 0.7207, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.279855482615013e-05, + "loss": 0.7155, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8358498215675354, + "eval_runtime": 6.6968, + "eval_samples_per_second": 538.762, + "eval_steps_per_second": 8.511, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.812264039248785e-05, + "loss": 0.7185, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8080554008483887, + "eval_runtime": 6.609, + "eval_samples_per_second": 545.925, + "eval_steps_per_second": 8.625, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.314454162974252e-05, + "loss": 0.7156, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.791337294724496e-05, + "loss": 0.7097, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.7955994606018066, + "eval_runtime": 6.9512, + "eval_samples_per_second": 519.046, + "eval_steps_per_second": 8.2, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 7.248074556690847e-05, + "loss": 0.71, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.690025832186512e-05, + "loss": 0.7058, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.8053115010261536, + "eval_runtime": 6.9729, + "eval_samples_per_second": 517.434, + "eval_steps_per_second": 8.175, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 6.122696884513525e-05, + "loss": 0.6986, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.7997655272483826, + "eval_runtime": 6.9075, + "eval_samples_per_second": 522.327, + "eval_steps_per_second": 8.252, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.5516850365634566e-05, + "loss": 0.7046, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.9826239470822567e-05, + "loss": 0.7015, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8101170659065247, + "eval_runtime": 6.8842, + "eval_samples_per_second": 524.1, + "eval_steps_per_second": 8.28, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 4.421128028442004e-05, + "loss": 0.6974, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9282150776053215, + "eval_loss": 0.7932699918746948, + "eval_runtime": 6.8046, + "eval_samples_per_second": 530.229, + "eval_steps_per_second": 8.377, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.872737054299264e-05, + "loss": 0.6956, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.3428615036463286e-05, + "loss": 0.6973, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.797511875629425, + "eval_runtime": 6.7865, + "eval_samples_per_second": 531.642, + "eval_steps_per_second": 8.399, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.8367291804962204e-05, + "loss": 0.6969, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.3593336358569614e-05, + "loss": 0.6938, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8013892769813538, + "eval_runtime": 6.6998, + "eval_samples_per_second": 538.527, + "eval_steps_per_second": 8.508, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.9153849008688698e-05, + "loss": 0.6997, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.793586254119873, + "eval_runtime": 6.9235, + "eval_samples_per_second": 521.124, + "eval_steps_per_second": 8.233, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.5092630171766075e-05, + "loss": 0.6912, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.1449748230098871e-05, + "loss": 0.692, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7942016124725342, + "eval_runtime": 6.7391, + "eval_samples_per_second": 535.38, + "eval_steps_per_second": 8.458, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 8.261144213255593e-06, + "loss": 0.6945, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 5.55827720036278e-06, + "loss": 0.6897, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9273835920177383, + "eval_loss": 0.7927607297897339, + "eval_runtime": 6.6537, + "eval_samples_per_second": 542.254, + "eval_steps_per_second": 8.567, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 3.367813941753172e-06, + "loss": 0.6888, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7930077314376831, + "eval_runtime": 6.8348, + "eval_samples_per_second": 527.885, + "eval_steps_per_second": 8.34, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.7113657621985004e-06, + "loss": 0.6944, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 6.052753414653067e-07, + "loss": 0.6941, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7932811379432678, + "eval_runtime": 6.7314, + "eval_samples_per_second": 535.998, + "eval_steps_per_second": 8.468, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 6.045547583785435e-08, + "loss": 0.6898, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.8025810718536377, + "eval_runtime": 6.619, + "eval_samples_per_second": 545.097, + "eval_steps_per_second": 8.612, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00011498816886092996, + "metric": "eval/loss", + "warmup_ratio": 0.3470097627965571 + } +} diff --git a/run-iy40zr57/checkpoint-1260/training_args.bin b/run-iy40zr57/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..189a291b9324ad4b5ff52885d99c900750700cf1 --- /dev/null +++ b/run-iy40zr57/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56cbb7cde107953cbe21201ab22e1ea0f6e4714f2a84393fa7697b9819a3c9c8 +size 4792 diff --git a/run-iy40zr57/checkpoint-340/model.safetensors b/run-iy40zr57/checkpoint-340/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5e5d60cb5d107e180e37bdd9693974870d1740f --- /dev/null +++ b/run-iy40zr57/checkpoint-340/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0111fce38e1cc04140ece4a3019011977d2826e195e46350cb045dacb5d90737 +size 198025308 diff --git a/run-iy40zr57/checkpoint-340/optimizer.pt b/run-iy40zr57/checkpoint-340/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fca227d66144499bcdeacb9dc7a677a1eb58e65 --- /dev/null +++ b/run-iy40zr57/checkpoint-340/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be3ced738291bd9b7b785fd31e8d4b935798a34930c675c86a3b4a48ce5833a8 +size 395900602 diff --git a/run-iy40zr57/checkpoint-340/rng_state.pth b/run-iy40zr57/checkpoint-340/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f9a46a823a1ec9e2632069a8b004b6092f163ec --- /dev/null +++ b/run-iy40zr57/checkpoint-340/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3620a5dda7d3e82cfb2a4256d68d9cb239991caa891ae2d30664aa89f654dc18 +size 14244 diff --git a/run-iy40zr57/checkpoint-340/scheduler.pt b/run-iy40zr57/checkpoint-340/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..60b5d36041387b3ecaa40f07b546cefac6fea7ad --- /dev/null +++ b/run-iy40zr57/checkpoint-340/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88a207e6ef69370c4c808994f92ee6ffaa941d55ebaad05243ceeb14f13277a6 +size 1064 diff --git a/run-iy40zr57/checkpoint-340/trainer_state.json b/run-iy40zr57/checkpoint-340/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2bce330b60de01c7844b2fd0b62ff8c4c6d68570 --- /dev/null +++ b/run-iy40zr57/checkpoint-340/trainer_state.json @@ -0,0 +1,177 @@ +{ + "best_metric": 0.9287694013303769, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-iy40zr57/checkpoint-340", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 340, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.8257817132058874e-06, + "loss": 1.4784, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_loss": 1.090883731842041, + "eval_runtime": 6.7636, + "eval_samples_per_second": 533.444, + "eval_steps_per_second": 8.427, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 1.3651563426411775e-05, + "loss": 1.1536, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 2.0477345139617665e-05, + "loss": 0.9422, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.873059866962306, + "eval_loss": 0.8888150453567505, + "eval_runtime": 6.5024, + "eval_samples_per_second": 554.873, + "eval_steps_per_second": 8.766, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 2.730312685282355e-05, + "loss": 0.8757, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8299553394317627, + "eval_runtime": 6.6301, + "eval_samples_per_second": 544.189, + "eval_steps_per_second": 8.597, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 3.412890856602944e-05, + "loss": 0.8396, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 4.095469027923533e-05, + "loss": 0.8127, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8024356961250305, + "eval_runtime": 6.6955, + "eval_samples_per_second": 538.872, + "eval_steps_per_second": 8.513, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 4.778047199244122e-05, + "loss": 0.801, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 5.46062537056471e-05, + "loss": 0.7925, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.7971590757369995, + "eval_runtime": 6.8032, + "eval_samples_per_second": 530.341, + "eval_steps_per_second": 8.378, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 6.1432035418853e-05, + "loss": 0.7888, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7974677681922913, + "eval_runtime": 6.9987, + "eval_samples_per_second": 515.527, + "eval_steps_per_second": 8.144, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 6.825781713205888e-05, + "loss": 0.7764, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 7.508359884526477e-05, + "loss": 0.779, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8010255098342896, + "eval_runtime": 6.7141, + "eval_samples_per_second": 537.376, + "eval_steps_per_second": 8.49, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 8.190938055847066e-05, + "loss": 0.7702, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 8.873516227167655e-05, + "loss": 0.7533, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9287694013303769, + "eval_loss": 0.8030133843421936, + "eval_runtime": 6.7151, + "eval_samples_per_second": 537.294, + "eval_steps_per_second": 8.488, + "step": 340 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00011498816886092996, + "metric": "eval/loss", + "warmup_ratio": 0.3470097627965571 + } +} diff --git a/run-iy40zr57/checkpoint-340/training_args.bin b/run-iy40zr57/checkpoint-340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..189a291b9324ad4b5ff52885d99c900750700cf1 --- /dev/null +++ b/run-iy40zr57/checkpoint-340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56cbb7cde107953cbe21201ab22e1ea0f6e4714f2a84393fa7697b9819a3c9c8 +size 4792 diff --git a/run-j27h69kd/checkpoint-1147/model.safetensors b/run-j27h69kd/checkpoint-1147/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..654729f64bb231dc3e70f2a3f6db0fd4ff5b02ae --- /dev/null +++ b/run-j27h69kd/checkpoint-1147/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc8ff69fbc6185d108d80d1438940eacbf90442b025299c4f783731eaace91e +size 198025308 diff --git a/run-j27h69kd/checkpoint-1147/optimizer.pt b/run-j27h69kd/checkpoint-1147/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d64f3843e0ac2270ef7f8f568313952d60e6420a --- /dev/null +++ b/run-j27h69kd/checkpoint-1147/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7334bf0cfaff9e471c4e166c5ff309e09bb2642a949833dd580b41d748e9a258 +size 395900602 diff --git a/run-j27h69kd/checkpoint-1147/rng_state.pth b/run-j27h69kd/checkpoint-1147/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..43b1a3175dffb3289ba56a1e7f78b36ca1615834 --- /dev/null +++ b/run-j27h69kd/checkpoint-1147/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2d43d63184b1920f250efdd6f38efa027691f238090c0a0b0f43317419a2de +size 14244 diff --git a/run-j27h69kd/checkpoint-1147/scheduler.pt b/run-j27h69kd/checkpoint-1147/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..44efbe64eec649a3c38faf52989dec631986935d --- /dev/null +++ b/run-j27h69kd/checkpoint-1147/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb1384056cb7541bcff247f1819882a1b0ac388eb635add8c467988b052c11f7 +size 1064 diff --git a/run-j27h69kd/checkpoint-1147/trainer_state.json b/run-j27h69kd/checkpoint-1147/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e69b0db5cd4b8efe4b695a165526401d6aef2424 --- /dev/null +++ b/run-j27h69kd/checkpoint-1147/trainer_state.json @@ -0,0 +1,534 @@ +{ + "best_metric": 0.9251662971175166, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-j27h69kd/checkpoint-1147", + "epoch": 26.988235294117647, + "eval_steps": 500, + "global_step": 1147, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.903724666427913e-05, + "loss": 1.3861, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.83009977827051, + "eval_loss": 0.9347839951515198, + "eval_runtime": 6.9017, + "eval_samples_per_second": 522.768, + "eval_steps_per_second": 8.259, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.807449332855826e-05, + "loss": 0.9674, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 5.71117399928374e-05, + "loss": 0.8636, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8251736760139465, + "eval_runtime": 6.9439, + "eval_samples_per_second": 519.594, + "eval_steps_per_second": 8.209, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 7.614898665711653e-05, + "loss": 0.8176, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8187695741653442, + "eval_runtime": 6.7389, + "eval_samples_per_second": 535.397, + "eval_steps_per_second": 8.458, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 9.518623332139567e-05, + "loss": 0.7985, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001142234799856748, + "loss": 0.7863, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8323052525520325, + "eval_runtime": 7.1912, + "eval_samples_per_second": 501.724, + "eval_steps_per_second": 7.926, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00013326072664995392, + "loss": 0.7788, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00015229797331423305, + "loss": 0.775, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.8416151404380798, + "eval_runtime": 6.94, + "eval_samples_per_second": 519.881, + "eval_steps_per_second": 8.213, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00015281663016172636, + "loss": 0.773, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.816801130771637, + "eval_runtime": 6.9146, + "eval_samples_per_second": 521.796, + "eval_steps_per_second": 8.243, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00015214279333262807, + "loss": 0.7606, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001510123907554268, + "loss": 0.7522, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8192659020423889, + "eval_runtime": 6.93, + "eval_samples_per_second": 520.635, + "eval_steps_per_second": 8.225, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00014943224668973758, + "loss": 0.7525, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00014741190049220274, + "loss": 0.7417, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8038569688796997, + "eval_runtime": 6.8602, + "eval_samples_per_second": 525.932, + "eval_steps_per_second": 8.309, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001449635490272335, + "loss": 0.7327, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8133779168128967, + "eval_runtime": 6.704, + "eval_samples_per_second": 538.188, + "eval_steps_per_second": 8.502, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00014210197303433158, + "loss": 0.7425, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00013884444789651323, + "loss": 0.734, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8051131367683411, + "eval_runtime": 6.876, + "eval_samples_per_second": 524.721, + "eval_steps_per_second": 8.29, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001352106393485275, + "loss": 0.7256, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8074860572814941, + "eval_runtime": 6.9252, + "eval_samples_per_second": 520.993, + "eval_steps_per_second": 8.231, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00013122248475448057, + "loss": 0.7239, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0001269040606715951, + "loss": 0.7213, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8024598956108093, + "eval_runtime": 6.8526, + "eval_samples_per_second": 526.518, + "eval_steps_per_second": 8.318, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012228143749962395, + "loss": 0.7201, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00011738252209340285, + "loss": 0.7201, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8057366609573364, + "eval_runtime": 7.0045, + "eval_samples_per_second": 515.099, + "eval_steps_per_second": 8.138, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011223688928869234, + "loss": 0.716, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8111719489097595, + "eval_runtime": 6.9232, + "eval_samples_per_second": 521.149, + "eval_steps_per_second": 8.233, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010687560335839048, + "loss": 0.7195, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010133103047698816, + "loss": 0.709, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8365785479545593, + "eval_runtime": 6.8505, + "eval_samples_per_second": 526.676, + "eval_steps_per_second": 8.321, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 9.563664332542245e-05, + "loss": 0.7129, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 8.982681901593139e-05, + "loss": 0.7049, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8116514682769775, + "eval_runtime": 6.625, + "eval_samples_per_second": 544.604, + "eval_steps_per_second": 8.604, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.39366315568419e-05, + "loss": 0.7071, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8132147192955017, + "eval_runtime": 6.7056, + "eval_samples_per_second": 538.061, + "eval_steps_per_second": 8.5, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 7.800164011018407e-05, + "loss": 0.7002, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.205767432042414e-05, + "loss": 0.7025, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.8015514016151428, + "eval_runtime": 6.7112, + "eval_samples_per_second": 537.612, + "eval_steps_per_second": 8.493, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.614061801029057e-05, + "loss": 0.699, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.0286192549524826e-05, + "loss": 0.6995, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8260408043861389, + "eval_runtime": 6.9056, + "eval_samples_per_second": 522.477, + "eval_steps_per_second": 8.254, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.452974120436396e-05, + "loss": 0.7039, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.7979505658149719, + "eval_runtime": 6.775, + "eval_samples_per_second": 532.545, + "eval_steps_per_second": 8.413, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 4.890601576963998e-05, + "loss": 0.6953, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.3448966771602675e-05, + "loss": 0.6956, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.809563398361206, + "eval_runtime": 6.7968, + "eval_samples_per_second": 530.837, + "eval_steps_per_second": 8.386, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.8191538508014216e-05, + "loss": 0.6947, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8077630996704102, + "eval_runtime": 6.9657, + "eval_samples_per_second": 517.968, + "eval_steps_per_second": 8.183, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.316547016286238e-05, + "loss": 0.6954, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.8401104196366028e-05, + "loss": 0.6934, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8040671348571777, + "eval_runtime": 6.9644, + "eval_samples_per_second": 518.063, + "eval_steps_per_second": 8.184, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.392720316702609e-05, + "loss": 0.6936, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.9770776091569715e-05, + "loss": 0.6909, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8043574094772339, + "eval_runtime": 6.7243, + "eval_samples_per_second": 536.558, + "eval_steps_per_second": 8.477, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.5956915391056278e-05, + "loss": 0.6889, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8034777641296387, + "eval_runtime": 6.9998, + "eval_samples_per_second": 515.447, + "eval_steps_per_second": 8.143, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.2508645407504062e-05, + "loss": 0.6888, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 9.446783405545129e-06, + "loss": 0.6874, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8066309690475464, + "eval_runtime": 6.6913, + "eval_samples_per_second": 539.208, + "eval_steps_per_second": 8.519, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.789813898244271e-06, + "loss": 0.689, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.55377705577859e-06, + "loss": 0.6913, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9251662971175166, + "eval_loss": 0.7976572513580322, + "eval_runtime": 7.0716, + "eval_samples_per_second": 510.211, + "eval_steps_per_second": 8.06, + "step": 1147 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00015303017510901303, + "metric": "eval/loss", + "warmup_ratio": 0.16566123614768755 + } +} diff --git a/run-j27h69kd/checkpoint-1147/training_args.bin b/run-j27h69kd/checkpoint-1147/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..23a70b7f0f800a8f029b8f2cdcb69c05332084a9 --- /dev/null +++ b/run-j27h69kd/checkpoint-1147/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa41e1f7f79c44677b93740d3764d5afe4d88f3beb1e9e915693cf8b3acf1f7e +size 4792 diff --git a/run-j27h69kd/checkpoint-1260/model.safetensors b/run-j27h69kd/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fb110cfbb3dab0b483095c8ae6fe988db03f899 --- /dev/null +++ b/run-j27h69kd/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92bb45367a1b563df41e17295497abcdcb76573f5240b34dfa91cbd203b88df7 +size 198025308 diff --git a/run-j27h69kd/checkpoint-1260/optimizer.pt b/run-j27h69kd/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..744b052ba6746dc5ae072cfa8b7544cc69e1665f --- /dev/null +++ b/run-j27h69kd/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe800544b7d4e997dabd5e72821d81f9d2d1c5d172253357a8ecf52f4266233f +size 395900602 diff --git a/run-j27h69kd/checkpoint-1260/rng_state.pth b/run-j27h69kd/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-j27h69kd/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-j27h69kd/checkpoint-1260/scheduler.pt b/run-j27h69kd/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..76ceeb326f7254a0da04213ebeebfa5810f2dab9 --- /dev/null +++ b/run-j27h69kd/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94881985b027656678fdea3ffb413f8cb5912de6bb5252454ddfd077d3fedd41 +size 1064 diff --git a/run-j27h69kd/checkpoint-1260/trainer_state.json b/run-j27h69kd/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4aa6ed276cbb978532759c3891695ee8624f0d86 --- /dev/null +++ b/run-j27h69kd/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9251662971175166, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-j27h69kd/checkpoint-1147", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.903724666427913e-05, + "loss": 1.3861, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.83009977827051, + "eval_loss": 0.9347839951515198, + "eval_runtime": 6.9017, + "eval_samples_per_second": 522.768, + "eval_steps_per_second": 8.259, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.807449332855826e-05, + "loss": 0.9674, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 5.71117399928374e-05, + "loss": 0.8636, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8251736760139465, + "eval_runtime": 6.9439, + "eval_samples_per_second": 519.594, + "eval_steps_per_second": 8.209, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 7.614898665711653e-05, + "loss": 0.8176, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8187695741653442, + "eval_runtime": 6.7389, + "eval_samples_per_second": 535.397, + "eval_steps_per_second": 8.458, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 9.518623332139567e-05, + "loss": 0.7985, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001142234799856748, + "loss": 0.7863, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8323052525520325, + "eval_runtime": 7.1912, + "eval_samples_per_second": 501.724, + "eval_steps_per_second": 7.926, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00013326072664995392, + "loss": 0.7788, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00015229797331423305, + "loss": 0.775, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.8416151404380798, + "eval_runtime": 6.94, + "eval_samples_per_second": 519.881, + "eval_steps_per_second": 8.213, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00015281663016172636, + "loss": 0.773, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.816801130771637, + "eval_runtime": 6.9146, + "eval_samples_per_second": 521.796, + "eval_steps_per_second": 8.243, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00015214279333262807, + "loss": 0.7606, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001510123907554268, + "loss": 0.7522, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8192659020423889, + "eval_runtime": 6.93, + "eval_samples_per_second": 520.635, + "eval_steps_per_second": 8.225, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00014943224668973758, + "loss": 0.7525, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00014741190049220274, + "loss": 0.7417, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8038569688796997, + "eval_runtime": 6.8602, + "eval_samples_per_second": 525.932, + "eval_steps_per_second": 8.309, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001449635490272335, + "loss": 0.7327, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8133779168128967, + "eval_runtime": 6.704, + "eval_samples_per_second": 538.188, + "eval_steps_per_second": 8.502, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00014210197303433158, + "loss": 0.7425, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00013884444789651323, + "loss": 0.734, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8051131367683411, + "eval_runtime": 6.876, + "eval_samples_per_second": 524.721, + "eval_steps_per_second": 8.29, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001352106393485275, + "loss": 0.7256, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8074860572814941, + "eval_runtime": 6.9252, + "eval_samples_per_second": 520.993, + "eval_steps_per_second": 8.231, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00013122248475448057, + "loss": 0.7239, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0001269040606715951, + "loss": 0.7213, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8024598956108093, + "eval_runtime": 6.8526, + "eval_samples_per_second": 526.518, + "eval_steps_per_second": 8.318, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012228143749962395, + "loss": 0.7201, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00011738252209340285, + "loss": 0.7201, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8057366609573364, + "eval_runtime": 7.0045, + "eval_samples_per_second": 515.099, + "eval_steps_per_second": 8.138, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011223688928869234, + "loss": 0.716, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8111719489097595, + "eval_runtime": 6.9232, + "eval_samples_per_second": 521.149, + "eval_steps_per_second": 8.233, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010687560335839048, + "loss": 0.7195, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010133103047698816, + "loss": 0.709, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8365785479545593, + "eval_runtime": 6.8505, + "eval_samples_per_second": 526.676, + "eval_steps_per_second": 8.321, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 9.563664332542245e-05, + "loss": 0.7129, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 8.982681901593139e-05, + "loss": 0.7049, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8116514682769775, + "eval_runtime": 6.625, + "eval_samples_per_second": 544.604, + "eval_steps_per_second": 8.604, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.39366315568419e-05, + "loss": 0.7071, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8132147192955017, + "eval_runtime": 6.7056, + "eval_samples_per_second": 538.061, + "eval_steps_per_second": 8.5, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 7.800164011018407e-05, + "loss": 0.7002, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.205767432042414e-05, + "loss": 0.7025, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.8015514016151428, + "eval_runtime": 6.7112, + "eval_samples_per_second": 537.612, + "eval_steps_per_second": 8.493, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.614061801029057e-05, + "loss": 0.699, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.0286192549524826e-05, + "loss": 0.6995, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8260408043861389, + "eval_runtime": 6.9056, + "eval_samples_per_second": 522.477, + "eval_steps_per_second": 8.254, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.452974120436396e-05, + "loss": 0.7039, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.7979505658149719, + "eval_runtime": 6.775, + "eval_samples_per_second": 532.545, + "eval_steps_per_second": 8.413, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 4.890601576963998e-05, + "loss": 0.6953, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.3448966771602675e-05, + "loss": 0.6956, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.809563398361206, + "eval_runtime": 6.7968, + "eval_samples_per_second": 530.837, + "eval_steps_per_second": 8.386, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.8191538508014216e-05, + "loss": 0.6947, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8077630996704102, + "eval_runtime": 6.9657, + "eval_samples_per_second": 517.968, + "eval_steps_per_second": 8.183, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.316547016286238e-05, + "loss": 0.6954, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.8401104196366028e-05, + "loss": 0.6934, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8040671348571777, + "eval_runtime": 6.9644, + "eval_samples_per_second": 518.063, + "eval_steps_per_second": 8.184, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.392720316702609e-05, + "loss": 0.6936, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.9770776091569715e-05, + "loss": 0.6909, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8043574094772339, + "eval_runtime": 6.7243, + "eval_samples_per_second": 536.558, + "eval_steps_per_second": 8.477, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.5956915391056278e-05, + "loss": 0.6889, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8034777641296387, + "eval_runtime": 6.9998, + "eval_samples_per_second": 515.447, + "eval_steps_per_second": 8.143, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.2508645407504062e-05, + "loss": 0.6888, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 9.446783405545129e-06, + "loss": 0.6874, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8066309690475464, + "eval_runtime": 6.6913, + "eval_samples_per_second": 539.208, + "eval_steps_per_second": 8.519, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.789813898244271e-06, + "loss": 0.689, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.55377705577859e-06, + "loss": 0.6913, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9251662971175166, + "eval_loss": 0.7976572513580322, + "eval_runtime": 7.0716, + "eval_samples_per_second": 510.211, + "eval_steps_per_second": 8.06, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.7521718706570184e-06, + "loss": 0.6898, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9235033259423503, + "eval_loss": 0.7983666658401489, + "eval_runtime": 6.6068, + "eval_samples_per_second": 546.105, + "eval_steps_per_second": 8.627, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.3958746640725548e-06, + "loss": 0.6862, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.930734253659013e-07, + "loss": 0.6861, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.7985328435897827, + "eval_runtime": 7.0666, + "eval_samples_per_second": 510.568, + "eval_steps_per_second": 8.066, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 4.921838099437898e-08, + "loss": 0.6887, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7973311543464661, + "eval_runtime": 6.976, + "eval_samples_per_second": 517.199, + "eval_steps_per_second": 8.171, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00015303017510901303, + "metric": "eval/loss", + "warmup_ratio": 0.16566123614768755 + } +} diff --git a/run-j27h69kd/checkpoint-1260/training_args.bin b/run-j27h69kd/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..23a70b7f0f800a8f029b8f2cdcb69c05332084a9 --- /dev/null +++ b/run-j27h69kd/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa41e1f7f79c44677b93740d3764d5afe4d88f3beb1e9e915693cf8b3acf1f7e +size 4792 diff --git a/run-ksque4cz/checkpoint-1232/model.safetensors b/run-ksque4cz/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a5784b8411236ab6d44d8e05303cd3cbe0b223c --- /dev/null +++ b/run-ksque4cz/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39008645259e934b5fabb7ae0df15443012d4fd0d0aa38da1115d4e17cf36407 +size 198025308 diff --git a/run-ksque4cz/checkpoint-1232/optimizer.pt b/run-ksque4cz/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7cf16a9f060ae97cb215d26ea3451574e596413 --- /dev/null +++ b/run-ksque4cz/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d381e462c7ff57fec29e23baeb99658783292530787e72f5626b1a44dd4235 +size 395900602 diff --git a/run-ksque4cz/checkpoint-1232/rng_state.pth b/run-ksque4cz/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-ksque4cz/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-ksque4cz/checkpoint-1232/scheduler.pt b/run-ksque4cz/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..674f3816c353463b073abc502ee023450dd6face --- /dev/null +++ b/run-ksque4cz/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82318ca5759871fccdef5276db5770bb81ecc983312b0da241eb5a3e365ff5a7 +size 1064 diff --git a/run-ksque4cz/checkpoint-1232/trainer_state.json b/run-ksque4cz/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..13ef41a42b397f8d6aafd1596fc6fc559ff7bbba --- /dev/null +++ b/run-ksque4cz/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9196230598669624, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-ksque4cz/checkpoint-1147", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.15927335363973e-05, + "loss": 1.2323, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.9307599067687988, + "eval_runtime": 6.8719, + "eval_samples_per_second": 525.039, + "eval_steps_per_second": 8.295, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001231854670727946, + "loss": 0.8685, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001847782006091919, + "loss": 0.8112, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8802660753880266, + "eval_loss": 0.8644247055053711, + "eval_runtime": 7.0389, + "eval_samples_per_second": 512.579, + "eval_steps_per_second": 8.098, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002463709341455892, + "loss": 0.7998, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9038248337028825, + "eval_loss": 0.8342661261558533, + "eval_runtime": 6.8875, + "eval_samples_per_second": 523.847, + "eval_steps_per_second": 8.276, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003079636676819865, + "loss": 0.797, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0003695564012183838, + "loss": 0.7934, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8766629711751663, + "eval_loss": 0.9014042019844055, + "eval_runtime": 6.9591, + "eval_samples_per_second": 518.456, + "eval_steps_per_second": 8.191, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004311491347547811, + "loss": 0.7855, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004927418682911784, + "loss": 0.7939, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8869179600886918, + "eval_loss": 0.8500996828079224, + "eval_runtime": 6.9403, + "eval_samples_per_second": 519.859, + "eval_steps_per_second": 8.213, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005543346018275757, + "loss": 0.7924, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8694567627494457, + "eval_loss": 0.9023913145065308, + "eval_runtime": 7.1681, + "eval_samples_per_second": 503.339, + "eval_steps_per_second": 7.952, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005631660138783322, + "loss": 0.7997, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005607472767689483, + "loss": 0.7942, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8777716186252772, + "eval_loss": 0.8763846755027771, + "eval_runtime": 6.8365, + "eval_samples_per_second": 527.754, + "eval_steps_per_second": 8.338, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005565483275047091, + "loss": 0.7976, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005505959734480405, + "loss": 0.7883, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 0.8391281366348267, + "eval_runtime": 6.9407, + "eval_samples_per_second": 519.834, + "eval_steps_per_second": 8.212, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005429282162280265, + "loss": 0.7848, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8941241685144125, + "eval_loss": 0.8455160856246948, + "eval_runtime": 6.7277, + "eval_samples_per_second": 536.288, + "eval_steps_per_second": 8.472, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005335940091265132, + "loss": 0.7804, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005226529445455467, + "loss": 0.7767, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8855321507760532, + "eval_loss": 0.8717533349990845, + "eval_runtime": 6.5716, + "eval_samples_per_second": 549.027, + "eval_steps_per_second": 8.674, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005101748735514504, + "loss": 0.7706, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8991130820399114, + "eval_loss": 0.8335922956466675, + "eval_runtime": 7.0637, + "eval_samples_per_second": 510.781, + "eval_steps_per_second": 8.069, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.000496239459924485, + "loss": 0.7681, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00048093567156117417, + "loss": 0.7563, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8841463414634146, + "eval_loss": 0.8561452627182007, + "eval_runtime": 6.681, + "eval_samples_per_second": 540.043, + "eval_steps_per_second": 8.532, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004643612124763379, + "loss": 0.7596, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004466218990311026, + "loss": 0.7567, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8392565846443176, + "eval_runtime": 6.6742, + "eval_samples_per_second": 540.588, + "eval_steps_per_second": 8.54, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00042783098436923904, + "loss": 0.7475, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8971729490022173, + "eval_loss": 0.8382574915885925, + "eval_runtime": 6.8559, + "eval_samples_per_second": 526.259, + "eval_steps_per_second": 8.314, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0004081084353748295, + "loss": 0.7531, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003875801667673857, + "loss": 0.7394, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8194323182106018, + "eval_runtime": 6.7401, + "eval_samples_per_second": 535.302, + "eval_steps_per_second": 8.457, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00036637723722418645, + "loss": 0.7433, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00034463501266203156, + "loss": 0.735, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8364071249961853, + "eval_runtime": 6.6264, + "eval_samples_per_second": 544.492, + "eval_steps_per_second": 8.602, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003224923020202771, + "loss": 0.738, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8276115655899048, + "eval_runtime": 6.5373, + "eval_samples_per_second": 551.912, + "eval_steps_per_second": 8.719, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003000904710625711, + "loss": 0.7262, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002775725398550519, + "loss": 0.7229, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8248811960220337, + "eval_runtime": 6.8257, + "eval_samples_per_second": 528.587, + "eval_steps_per_second": 8.351, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002550822696829764, + "loss": 0.7179, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023276324523518316, + "loss": 0.7181, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.82561194896698, + "eval_runtime": 6.6189, + "eval_samples_per_second": 545.104, + "eval_steps_per_second": 8.612, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.000210757957915999, + "loss": 0.7239, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8202174305915833, + "eval_runtime": 6.8951, + "eval_samples_per_second": 523.269, + "eval_steps_per_second": 8.267, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00018920689613700605, + "loss": 0.7125, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0001682476483965191, + "loss": 0.7091, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8158205151557922, + "eval_runtime": 6.7662, + "eval_samples_per_second": 533.242, + "eval_steps_per_second": 8.424, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00014801402487298792, + "loss": 0.7049, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8252764344215393, + "eval_runtime": 6.8803, + "eval_samples_per_second": 524.399, + "eval_steps_per_second": 8.285, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00012863520314033514, + "loss": 0.7044, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001102349034592449, + "loss": 0.7046, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8157876133918762, + "eval_runtime": 6.801, + "eval_samples_per_second": 530.508, + "eval_steps_per_second": 8.381, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.293059890958782e-05, + "loss": 0.7037, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 7.683276540674069e-05, + "loss": 0.6994, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8106905221939087, + "eval_runtime": 6.8928, + "eval_samples_per_second": 523.444, + "eval_steps_per_second": 8.269, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.20441763899192e-05, + "loss": 0.6948, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8103103637695312, + "eval_runtime": 6.8822, + "eval_samples_per_second": 524.253, + "eval_steps_per_second": 8.282, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 4.865924668545026e-05, + "loss": 0.697, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.676342973395662e-05, + "loss": 0.6949, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8120396137237549, + "eval_runtime": 7.2032, + "eval_samples_per_second": 500.887, + "eval_steps_per_second": 7.913, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.6432672029739722e-05, + "loss": 0.6959, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.773292825538323e-05, + "loss": 0.6974, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8067687153816223, + "eval_runtime": 6.8147, + "eval_samples_per_second": 529.444, + "eval_steps_per_second": 8.364, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.0719740207106621e-05, + "loss": 0.6952, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8088511824607849, + "eval_runtime": 6.6939, + "eval_samples_per_second": 538.999, + "eval_steps_per_second": 8.515, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 5.4378821991342475e-06, + "loss": 0.6881, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.921075210927971e-06, + "loss": 0.6904, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8093944191932678, + "eval_runtime": 7.132, + "eval_samples_per_second": 505.888, + "eval_steps_per_second": 7.992, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0005638104069870214, + "metric": "eval/loss", + "warmup_ratio": 0.18870820717964376 + } +} diff --git a/run-ksque4cz/checkpoint-1232/training_args.bin b/run-ksque4cz/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fc209c74541f699c57dbdf3fc8395b3808d45d7 --- /dev/null +++ b/run-ksque4cz/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4ddfab71daf7b207c35d59d7de73e525833f8733be03b9522616d4770bb722 +size 4792 diff --git a/run-ksque4cz/checkpoint-1260/model.safetensors b/run-ksque4cz/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02d80271c5cce1d019077f2cadfed7a971d03db5 --- /dev/null +++ b/run-ksque4cz/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d0e9c2d6afe754fbcaacfa7b14d09739b4b2d47f08289b8b240a1e503031653 +size 198025308 diff --git a/run-ksque4cz/checkpoint-1260/optimizer.pt b/run-ksque4cz/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..72f5ca7bd1a344873d3302a209f2918f9a1899af --- /dev/null +++ b/run-ksque4cz/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9142d40b3a3671923e579d3ec1c3ce42838e2712290f8f6bae104d5643384b16 +size 395900602 diff --git a/run-ksque4cz/checkpoint-1260/rng_state.pth b/run-ksque4cz/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-ksque4cz/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-ksque4cz/checkpoint-1260/scheduler.pt b/run-ksque4cz/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d784f33bd5a88eccaa00a40b0404afb0aa72dae1 --- /dev/null +++ b/run-ksque4cz/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5fa134e037121418e2a190563c3a910c56dade94b8001134ff9c0e8ddca572e +size 1064 diff --git a/run-ksque4cz/checkpoint-1260/trainer_state.json b/run-ksque4cz/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d6b02292c47da401d06ffe62029b2cb68b651267 --- /dev/null +++ b/run-ksque4cz/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9235033259423503, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-ksque4cz/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.15927335363973e-05, + "loss": 1.2323, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.9307599067687988, + "eval_runtime": 6.8719, + "eval_samples_per_second": 525.039, + "eval_steps_per_second": 8.295, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001231854670727946, + "loss": 0.8685, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001847782006091919, + "loss": 0.8112, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8802660753880266, + "eval_loss": 0.8644247055053711, + "eval_runtime": 7.0389, + "eval_samples_per_second": 512.579, + "eval_steps_per_second": 8.098, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002463709341455892, + "loss": 0.7998, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9038248337028825, + "eval_loss": 0.8342661261558533, + "eval_runtime": 6.8875, + "eval_samples_per_second": 523.847, + "eval_steps_per_second": 8.276, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003079636676819865, + "loss": 0.797, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0003695564012183838, + "loss": 0.7934, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8766629711751663, + "eval_loss": 0.9014042019844055, + "eval_runtime": 6.9591, + "eval_samples_per_second": 518.456, + "eval_steps_per_second": 8.191, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004311491347547811, + "loss": 0.7855, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004927418682911784, + "loss": 0.7939, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8869179600886918, + "eval_loss": 0.8500996828079224, + "eval_runtime": 6.9403, + "eval_samples_per_second": 519.859, + "eval_steps_per_second": 8.213, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005543346018275757, + "loss": 0.7924, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8694567627494457, + "eval_loss": 0.9023913145065308, + "eval_runtime": 7.1681, + "eval_samples_per_second": 503.339, + "eval_steps_per_second": 7.952, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005631660138783322, + "loss": 0.7997, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005607472767689483, + "loss": 0.7942, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8777716186252772, + "eval_loss": 0.8763846755027771, + "eval_runtime": 6.8365, + "eval_samples_per_second": 527.754, + "eval_steps_per_second": 8.338, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005565483275047091, + "loss": 0.7976, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005505959734480405, + "loss": 0.7883, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 0.8391281366348267, + "eval_runtime": 6.9407, + "eval_samples_per_second": 519.834, + "eval_steps_per_second": 8.212, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005429282162280265, + "loss": 0.7848, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8941241685144125, + "eval_loss": 0.8455160856246948, + "eval_runtime": 6.7277, + "eval_samples_per_second": 536.288, + "eval_steps_per_second": 8.472, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005335940091265132, + "loss": 0.7804, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005226529445455467, + "loss": 0.7767, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8855321507760532, + "eval_loss": 0.8717533349990845, + "eval_runtime": 6.5716, + "eval_samples_per_second": 549.027, + "eval_steps_per_second": 8.674, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005101748735514504, + "loss": 0.7706, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8991130820399114, + "eval_loss": 0.8335922956466675, + "eval_runtime": 7.0637, + "eval_samples_per_second": 510.781, + "eval_steps_per_second": 8.069, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.000496239459924485, + "loss": 0.7681, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00048093567156117417, + "loss": 0.7563, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8841463414634146, + "eval_loss": 0.8561452627182007, + "eval_runtime": 6.681, + "eval_samples_per_second": 540.043, + "eval_steps_per_second": 8.532, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004643612124763379, + "loss": 0.7596, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004466218990311026, + "loss": 0.7567, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8392565846443176, + "eval_runtime": 6.6742, + "eval_samples_per_second": 540.588, + "eval_steps_per_second": 8.54, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00042783098436923904, + "loss": 0.7475, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8971729490022173, + "eval_loss": 0.8382574915885925, + "eval_runtime": 6.8559, + "eval_samples_per_second": 526.259, + "eval_steps_per_second": 8.314, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0004081084353748295, + "loss": 0.7531, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003875801667673857, + "loss": 0.7394, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8194323182106018, + "eval_runtime": 6.7401, + "eval_samples_per_second": 535.302, + "eval_steps_per_second": 8.457, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00036637723722418645, + "loss": 0.7433, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00034463501266203156, + "loss": 0.735, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8364071249961853, + "eval_runtime": 6.6264, + "eval_samples_per_second": 544.492, + "eval_steps_per_second": 8.602, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003224923020202771, + "loss": 0.738, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8276115655899048, + "eval_runtime": 6.5373, + "eval_samples_per_second": 551.912, + "eval_steps_per_second": 8.719, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003000904710625711, + "loss": 0.7262, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002775725398550519, + "loss": 0.7229, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8248811960220337, + "eval_runtime": 6.8257, + "eval_samples_per_second": 528.587, + "eval_steps_per_second": 8.351, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002550822696829764, + "loss": 0.7179, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023276324523518316, + "loss": 0.7181, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.82561194896698, + "eval_runtime": 6.6189, + "eval_samples_per_second": 545.104, + "eval_steps_per_second": 8.612, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.000210757957915999, + "loss": 0.7239, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8202174305915833, + "eval_runtime": 6.8951, + "eval_samples_per_second": 523.269, + "eval_steps_per_second": 8.267, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00018920689613700605, + "loss": 0.7125, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0001682476483965191, + "loss": 0.7091, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8158205151557922, + "eval_runtime": 6.7662, + "eval_samples_per_second": 533.242, + "eval_steps_per_second": 8.424, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00014801402487298792, + "loss": 0.7049, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8252764344215393, + "eval_runtime": 6.8803, + "eval_samples_per_second": 524.399, + "eval_steps_per_second": 8.285, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00012863520314033514, + "loss": 0.7044, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001102349034592449, + "loss": 0.7046, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8157876133918762, + "eval_runtime": 6.801, + "eval_samples_per_second": 530.508, + "eval_steps_per_second": 8.381, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.293059890958782e-05, + "loss": 0.7037, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 7.683276540674069e-05, + "loss": 0.6994, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8106905221939087, + "eval_runtime": 6.8928, + "eval_samples_per_second": 523.444, + "eval_steps_per_second": 8.269, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.20441763899192e-05, + "loss": 0.6948, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8103103637695312, + "eval_runtime": 6.8822, + "eval_samples_per_second": 524.253, + "eval_steps_per_second": 8.282, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 4.865924668545026e-05, + "loss": 0.697, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.676342973395662e-05, + "loss": 0.6949, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8120396137237549, + "eval_runtime": 7.2032, + "eval_samples_per_second": 500.887, + "eval_steps_per_second": 7.913, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.6432672029739722e-05, + "loss": 0.6959, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.773292825538323e-05, + "loss": 0.6974, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8067687153816223, + "eval_runtime": 6.8147, + "eval_samples_per_second": 529.444, + "eval_steps_per_second": 8.364, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.0719740207106621e-05, + "loss": 0.6952, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8088511824607849, + "eval_runtime": 6.6939, + "eval_samples_per_second": 538.999, + "eval_steps_per_second": 8.515, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 5.4378821991342475e-06, + "loss": 0.6881, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.921075210927971e-06, + "loss": 0.6904, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8093944191932678, + "eval_runtime": 7.132, + "eval_samples_per_second": 505.888, + "eval_steps_per_second": 7.992, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.917716022523345e-07, + "loss": 0.6942, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9235033259423503, + "eval_loss": 0.801005482673645, + "eval_runtime": 6.4146, + "eval_samples_per_second": 562.469, + "eval_steps_per_second": 8.886, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0005638104069870214, + "metric": "eval/loss", + "warmup_ratio": 0.18870820717964376 + } +} diff --git a/run-ksque4cz/checkpoint-1260/training_args.bin b/run-ksque4cz/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fc209c74541f699c57dbdf3fc8395b3808d45d7 --- /dev/null +++ b/run-ksque4cz/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4ddfab71daf7b207c35d59d7de73e525833f8733be03b9522616d4770bb722 +size 4792 diff --git a/run-kt2ie26h/checkpoint-616/model.safetensors b/run-kt2ie26h/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b0cd852620ca545c2d60aca5b90e5232218cd4d --- /dev/null +++ b/run-kt2ie26h/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf8591f4198683ce629771e4d592a6cd066b629de4ae95981a2316fcc30207c +size 198025308 diff --git a/run-kt2ie26h/checkpoint-616/optimizer.pt b/run-kt2ie26h/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..852d645492af2b308b24d2e12da96c69e3542a90 --- /dev/null +++ b/run-kt2ie26h/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2282fa0ed6089e11625ec68dde777e817c3c8a5bcd18f3d4868286d34fbed3c +size 395900602 diff --git a/run-kt2ie26h/checkpoint-616/rng_state.pth b/run-kt2ie26h/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-kt2ie26h/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-kt2ie26h/checkpoint-616/scheduler.pt b/run-kt2ie26h/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb79785a11a5e56922aa778db72a01844d18d900 --- /dev/null +++ b/run-kt2ie26h/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d650151446f4153a2f16cba7e66a8662b3e309929193d40ad5e37df915924718 +size 1064 diff --git a/run-kt2ie26h/checkpoint-616/trainer_state.json b/run-kt2ie26h/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3780eb4901ee24ec999518787661e02cab0b2e2c --- /dev/null +++ b/run-kt2ie26h/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9195148997386117, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-kt2ie26h/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.88582081150443e-06, + "loss": 1.5156, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.7904656319290465, + "eval_f1": 0.740656389304416, + "eval_loss": 1.3383630514144897, + "eval_precision": 0.7218771761866012, + "eval_recall": 0.7904656319290465, + "eval_runtime": 7.9325, + "eval_samples_per_second": 454.839, + "eval_steps_per_second": 3.656, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 9.77164162300886e-06, + "loss": 1.3811, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.4657462434513293e-05, + "loss": 1.1234, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9867268800735474, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9672, + "eval_samples_per_second": 452.856, + "eval_steps_per_second": 3.64, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 1.954328324601772e-05, + "loss": 0.9585, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8625277161862528, + "eval_f1": 0.8219234371008811, + "eval_loss": 0.9117822647094727, + "eval_precision": 0.8595646098513291, + "eval_recall": 0.8625277161862528, + "eval_runtime": 8.246, + "eval_samples_per_second": 437.547, + "eval_steps_per_second": 3.517, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 2.4429104057522157e-05, + "loss": 0.9157, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 2.9314924869026586e-05, + "loss": 0.871, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8787977892571924, + "eval_loss": 0.8646798729896545, + "eval_precision": 0.8855552776107575, + "eval_recall": 0.8902439024390244, + "eval_runtime": 7.779, + "eval_samples_per_second": 463.815, + "eval_steps_per_second": 3.728, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 3.420074568053101e-05, + "loss": 0.8486, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 3.908656649203544e-05, + "loss": 0.8151, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.9015529987858574, + "eval_loss": 0.828107476234436, + "eval_precision": 0.8985202159133769, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.0971, + "eval_samples_per_second": 445.592, + "eval_steps_per_second": 3.582, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 4.397238730353988e-05, + "loss": 0.7919, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9034386585040912, + "eval_loss": 0.8040855526924133, + "eval_precision": 0.9001854637488936, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.0741, + "eval_samples_per_second": 446.859, + "eval_steps_per_second": 3.592, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 4.734751926026701e-05, + "loss": 0.7917, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 4.722206739596685e-05, + "loss": 0.7839, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9058422753727602, + "eval_loss": 0.8005436062812805, + "eval_precision": 0.9049054924930415, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.6891, + "eval_samples_per_second": 469.237, + "eval_steps_per_second": 3.772, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 4.694209736247623e-05, + "loss": 0.7787, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 4.65094465409956e-05, + "loss": 0.7717, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9063606071996063, + "eval_loss": 0.8067835569381714, + "eval_precision": 0.9035344140496342, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.5142, + "eval_samples_per_second": 480.156, + "eval_steps_per_second": 3.859, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 4.592695432285491e-05, + "loss": 0.7689, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9023625774928576, + "eval_loss": 0.8100271224975586, + "eval_precision": 0.902033350890346, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.0912, + "eval_samples_per_second": 445.918, + "eval_steps_per_second": 3.584, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 4.519844347521849e-05, + "loss": 0.7627, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 4.432869505311238e-05, + "loss": 0.757, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9084706455005981, + "eval_loss": 0.8028958439826965, + "eval_precision": 0.9047481373076067, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.0465, + "eval_samples_per_second": 448.395, + "eval_steps_per_second": 3.604, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 4.332341702242123e-05, + "loss": 0.7533, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9138892884057384, + "eval_loss": 0.79969722032547, + "eval_precision": 0.910249726451116, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.0164, + "eval_samples_per_second": 450.077, + "eval_steps_per_second": 3.618, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 4.2189206799774915e-05, + "loss": 0.7516, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 4.093350795516715e-05, + "loss": 0.7443, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9132121814120838, + "eval_loss": 0.7963896989822388, + "eval_precision": 0.909302038502887, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.8361, + "eval_samples_per_second": 460.435, + "eval_steps_per_second": 3.701, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 3.956456136145757e-05, + "loss": 0.7508, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 3.809135111135186e-05, + "loss": 0.7392, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9093107715810045, + "eval_loss": 0.8137462735176086, + "eval_precision": 0.9108607481752476, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.9539, + "eval_samples_per_second": 453.613, + "eval_steps_per_second": 3.646, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 3.6523545556795014e-05, + "loss": 0.7481, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9112161539467777, + "eval_loss": 0.8010615706443787, + "eval_precision": 0.907459141052013, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.0358, + "eval_samples_per_second": 448.99, + "eval_steps_per_second": 3.609, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 3.48714338577231e-05, + "loss": 0.7327, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 3.314585845658991e-05, + "loss": 0.7364, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9154221976020053, + "eval_loss": 0.7934799194335938, + "eval_precision": 0.9129493285377775, + "eval_recall": 0.9210088691796009, + "eval_runtime": 8.0354, + "eval_samples_per_second": 449.011, + "eval_steps_per_second": 3.609, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 3.13581439218232e-05, + "loss": 0.7344, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 2.9520022627195197e-05, + "loss": 0.7349, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9128222783503925, + "eval_loss": 0.8026086688041687, + "eval_precision": 0.9114429279209935, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.1136, + "eval_samples_per_second": 444.684, + "eval_steps_per_second": 3.574, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 2.7643557754857145e-05, + "loss": 0.7301, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9114227221675604, + "eval_loss": 0.8016567230224609, + "eval_precision": 0.9100536972621655, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.8802, + "eval_samples_per_second": 457.857, + "eval_steps_per_second": 3.68, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 2.5741064127351785e-05, + "loss": 0.7277, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 2.382502738816586e-05, + "loss": 0.7271, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.9080588357855864, + "eval_loss": 0.807327389717102, + "eval_precision": 0.9069416317426184, + "eval_recall": 0.9101995565410199, + "eval_runtime": 7.7324, + "eval_samples_per_second": 466.607, + "eval_steps_per_second": 3.75, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 2.190802206122269e-05, + "loss": 0.7284, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 2.000262902707208e-05, + "loss": 0.7223, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.910166903311314, + "eval_loss": 0.7990384101867676, + "eval_precision": 0.9078942026995984, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.0547, + "eval_samples_per_second": 447.938, + "eval_steps_per_second": 3.6, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.8121352957363198e-05, + "loss": 0.7273, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9109255687635067, + "eval_loss": 0.80125892162323, + "eval_precision": 0.9076340533637645, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.9481, + "eval_samples_per_second": 453.943, + "eval_steps_per_second": 3.649, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.6276540249459438e-05, + "loss": 0.72, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 1.4480297999772443e-05, + "loss": 0.7143, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9073508306649442, + "eval_loss": 0.8037462830543518, + "eval_precision": 0.9033749482136944, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.0403, + "eval_samples_per_second": 448.739, + "eval_steps_per_second": 3.607, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.2744414547575058e-05, + "loss": 0.7227, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9119080924748105, + "eval_loss": 0.8022773265838623, + "eval_precision": 0.9089256462160646, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.98, + "eval_samples_per_second": 452.13, + "eval_steps_per_second": 3.634, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.108028211074666e-05, + "loss": 0.7191, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 9.49882202117536e-06, + "loss": 0.7169, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9092266617539181, + "eval_loss": 0.8043237328529358, + "eval_precision": 0.9057251551288309, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.031, + "eval_samples_per_second": 449.258, + "eval_steps_per_second": 3.611, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 8.010413050480398e-06, + "loss": 0.7166, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.62482329643718e-06, + "loss": 0.7148, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9235033259423503, + "eval_f1": 0.9191500848683661, + "eval_loss": 0.7949368953704834, + "eval_precision": 0.9161724447973942, + "eval_recall": 0.9235033259423503, + "eval_runtime": 7.5984, + "eval_samples_per_second": 474.834, + "eval_steps_per_second": 3.817, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 5.351146077119304e-06, + "loss": 0.7158, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9141942483403345, + "eval_loss": 0.8005040884017944, + "eval_precision": 0.9112413802358937, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.5194, + "eval_samples_per_second": 479.828, + "eval_steps_per_second": 3.857, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 4.197740253469952e-06, + "loss": 0.7164, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 3.172175371952477e-06, + "loss": 0.7105, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9126126200538591, + "eval_loss": 0.7994533181190491, + "eval_precision": 0.9093278443889663, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.5846, + "eval_samples_per_second": 475.701, + "eval_steps_per_second": 3.824, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.2811819872967625e-06, + "loss": 0.7148, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.5306074913621072e-06, + "loss": 0.7112, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9115163814202354, + "eval_loss": 0.8017727136611938, + "eval_precision": 0.9079494118563345, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.6455, + "eval_samples_per_second": 471.912, + "eval_steps_per_second": 3.793, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 9.253777380021159e-07, + "loss": 0.7153, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9127162721135327, + "eval_loss": 0.7985677719116211, + "eval_precision": 0.9095597765167394, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.8314, + "eval_samples_per_second": 460.712, + "eval_steps_per_second": 3.703, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 4.6946471577894757e-07, + "loss": 0.7147, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.658604806838277e-07, + "loss": 0.7163, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9251662971175166, + "eval_f1": 0.9195148997386117, + "eval_loss": 0.7940902709960938, + "eval_precision": 0.9167199938977899, + "eval_recall": 0.9251662971175166, + "eval_runtime": 7.8749, + "eval_samples_per_second": 458.165, + "eval_steps_per_second": 3.683, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.13342346981599873, + "learning_rate": 4.7354878634581407e-05, + "metric": "eval/loss", + "weight_decay": 0.12408483813956132 + } +} diff --git a/run-kt2ie26h/checkpoint-616/training_args.bin b/run-kt2ie26h/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e5ff7b527678e58f733d230012ccae705bb3fc0 --- /dev/null +++ b/run-kt2ie26h/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8765ab0fe7b952026003619f8425a670cdbe1185f3a728eb407a535d66fe74fc +size 4792 diff --git a/run-kt2ie26h/checkpoint-630/model.safetensors b/run-kt2ie26h/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe1745a2f5436455bb5f79e895ab287b5b4978d4 --- /dev/null +++ b/run-kt2ie26h/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:276f19edba7d53ddecbba35fae480845792e5cd39943411de867677129cb3363 +size 198025308 diff --git a/run-kt2ie26h/checkpoint-630/optimizer.pt b/run-kt2ie26h/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..649e0356be47e6a5d803830beed26fc42ea86223 --- /dev/null +++ b/run-kt2ie26h/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6b98c96d95a7bffc6d11fa8e6ae98888a7d83a2d02142c645ce283bf777ae2 +size 395900602 diff --git a/run-kt2ie26h/checkpoint-630/rng_state.pth b/run-kt2ie26h/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-kt2ie26h/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-kt2ie26h/checkpoint-630/scheduler.pt b/run-kt2ie26h/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d39c37819530e363d765dae83d5df79abf0e6d08 --- /dev/null +++ b/run-kt2ie26h/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12604a75afcc61a24987faa0d9fe5dba3855146d4e9a227d4d552942f7875e8f +size 1064 diff --git a/run-kt2ie26h/checkpoint-630/trainer_state.json b/run-kt2ie26h/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c90fc12539cab872d3e6f6af63261ff3e745a0cd --- /dev/null +++ b/run-kt2ie26h/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9195148997386117, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-kt2ie26h/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.88582081150443e-06, + "loss": 1.5156, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.7904656319290465, + "eval_f1": 0.740656389304416, + "eval_loss": 1.3383630514144897, + "eval_precision": 0.7218771761866012, + "eval_recall": 0.7904656319290465, + "eval_runtime": 7.9325, + "eval_samples_per_second": 454.839, + "eval_steps_per_second": 3.656, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 9.77164162300886e-06, + "loss": 1.3811, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.4657462434513293e-05, + "loss": 1.1234, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9867268800735474, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9672, + "eval_samples_per_second": 452.856, + "eval_steps_per_second": 3.64, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 1.954328324601772e-05, + "loss": 0.9585, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8625277161862528, + "eval_f1": 0.8219234371008811, + "eval_loss": 0.9117822647094727, + "eval_precision": 0.8595646098513291, + "eval_recall": 0.8625277161862528, + "eval_runtime": 8.246, + "eval_samples_per_second": 437.547, + "eval_steps_per_second": 3.517, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 2.4429104057522157e-05, + "loss": 0.9157, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 2.9314924869026586e-05, + "loss": 0.871, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8787977892571924, + "eval_loss": 0.8646798729896545, + "eval_precision": 0.8855552776107575, + "eval_recall": 0.8902439024390244, + "eval_runtime": 7.779, + "eval_samples_per_second": 463.815, + "eval_steps_per_second": 3.728, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 3.420074568053101e-05, + "loss": 0.8486, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 3.908656649203544e-05, + "loss": 0.8151, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.9015529987858574, + "eval_loss": 0.828107476234436, + "eval_precision": 0.8985202159133769, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.0971, + "eval_samples_per_second": 445.592, + "eval_steps_per_second": 3.582, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 4.397238730353988e-05, + "loss": 0.7919, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9034386585040912, + "eval_loss": 0.8040855526924133, + "eval_precision": 0.9001854637488936, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.0741, + "eval_samples_per_second": 446.859, + "eval_steps_per_second": 3.592, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 4.734751926026701e-05, + "loss": 0.7917, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 4.722206739596685e-05, + "loss": 0.7839, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9058422753727602, + "eval_loss": 0.8005436062812805, + "eval_precision": 0.9049054924930415, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.6891, + "eval_samples_per_second": 469.237, + "eval_steps_per_second": 3.772, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 4.694209736247623e-05, + "loss": 0.7787, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 4.65094465409956e-05, + "loss": 0.7717, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9063606071996063, + "eval_loss": 0.8067835569381714, + "eval_precision": 0.9035344140496342, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.5142, + "eval_samples_per_second": 480.156, + "eval_steps_per_second": 3.859, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 4.592695432285491e-05, + "loss": 0.7689, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9023625774928576, + "eval_loss": 0.8100271224975586, + "eval_precision": 0.902033350890346, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.0912, + "eval_samples_per_second": 445.918, + "eval_steps_per_second": 3.584, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 4.519844347521849e-05, + "loss": 0.7627, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 4.432869505311238e-05, + "loss": 0.757, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9084706455005981, + "eval_loss": 0.8028958439826965, + "eval_precision": 0.9047481373076067, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.0465, + "eval_samples_per_second": 448.395, + "eval_steps_per_second": 3.604, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 4.332341702242123e-05, + "loss": 0.7533, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9138892884057384, + "eval_loss": 0.79969722032547, + "eval_precision": 0.910249726451116, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.0164, + "eval_samples_per_second": 450.077, + "eval_steps_per_second": 3.618, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 4.2189206799774915e-05, + "loss": 0.7516, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 4.093350795516715e-05, + "loss": 0.7443, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9132121814120838, + "eval_loss": 0.7963896989822388, + "eval_precision": 0.909302038502887, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.8361, + "eval_samples_per_second": 460.435, + "eval_steps_per_second": 3.701, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 3.956456136145757e-05, + "loss": 0.7508, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 3.809135111135186e-05, + "loss": 0.7392, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9093107715810045, + "eval_loss": 0.8137462735176086, + "eval_precision": 0.9108607481752476, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.9539, + "eval_samples_per_second": 453.613, + "eval_steps_per_second": 3.646, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 3.6523545556795014e-05, + "loss": 0.7481, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9112161539467777, + "eval_loss": 0.8010615706443787, + "eval_precision": 0.907459141052013, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.0358, + "eval_samples_per_second": 448.99, + "eval_steps_per_second": 3.609, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 3.48714338577231e-05, + "loss": 0.7327, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 3.314585845658991e-05, + "loss": 0.7364, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9154221976020053, + "eval_loss": 0.7934799194335938, + "eval_precision": 0.9129493285377775, + "eval_recall": 0.9210088691796009, + "eval_runtime": 8.0354, + "eval_samples_per_second": 449.011, + "eval_steps_per_second": 3.609, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 3.13581439218232e-05, + "loss": 0.7344, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 2.9520022627195197e-05, + "loss": 0.7349, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9128222783503925, + "eval_loss": 0.8026086688041687, + "eval_precision": 0.9114429279209935, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.1136, + "eval_samples_per_second": 444.684, + "eval_steps_per_second": 3.574, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 2.7643557754857145e-05, + "loss": 0.7301, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9114227221675604, + "eval_loss": 0.8016567230224609, + "eval_precision": 0.9100536972621655, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.8802, + "eval_samples_per_second": 457.857, + "eval_steps_per_second": 3.68, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 2.5741064127351785e-05, + "loss": 0.7277, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 2.382502738816586e-05, + "loss": 0.7271, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.9080588357855864, + "eval_loss": 0.807327389717102, + "eval_precision": 0.9069416317426184, + "eval_recall": 0.9101995565410199, + "eval_runtime": 7.7324, + "eval_samples_per_second": 466.607, + "eval_steps_per_second": 3.75, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 2.190802206122269e-05, + "loss": 0.7284, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 2.000262902707208e-05, + "loss": 0.7223, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.910166903311314, + "eval_loss": 0.7990384101867676, + "eval_precision": 0.9078942026995984, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.0547, + "eval_samples_per_second": 447.938, + "eval_steps_per_second": 3.6, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.8121352957363198e-05, + "loss": 0.7273, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9109255687635067, + "eval_loss": 0.80125892162323, + "eval_precision": 0.9076340533637645, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.9481, + "eval_samples_per_second": 453.943, + "eval_steps_per_second": 3.649, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.6276540249459438e-05, + "loss": 0.72, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 1.4480297999772443e-05, + "loss": 0.7143, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9073508306649442, + "eval_loss": 0.8037462830543518, + "eval_precision": 0.9033749482136944, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.0403, + "eval_samples_per_second": 448.739, + "eval_steps_per_second": 3.607, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.2744414547575058e-05, + "loss": 0.7227, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9119080924748105, + "eval_loss": 0.8022773265838623, + "eval_precision": 0.9089256462160646, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.98, + "eval_samples_per_second": 452.13, + "eval_steps_per_second": 3.634, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.108028211074666e-05, + "loss": 0.7191, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 9.49882202117536e-06, + "loss": 0.7169, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9092266617539181, + "eval_loss": 0.8043237328529358, + "eval_precision": 0.9057251551288309, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.031, + "eval_samples_per_second": 449.258, + "eval_steps_per_second": 3.611, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 8.010413050480398e-06, + "loss": 0.7166, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.62482329643718e-06, + "loss": 0.7148, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9235033259423503, + "eval_f1": 0.9191500848683661, + "eval_loss": 0.7949368953704834, + "eval_precision": 0.9161724447973942, + "eval_recall": 0.9235033259423503, + "eval_runtime": 7.5984, + "eval_samples_per_second": 474.834, + "eval_steps_per_second": 3.817, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 5.351146077119304e-06, + "loss": 0.7158, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9141942483403345, + "eval_loss": 0.8005040884017944, + "eval_precision": 0.9112413802358937, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.5194, + "eval_samples_per_second": 479.828, + "eval_steps_per_second": 3.857, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 4.197740253469952e-06, + "loss": 0.7164, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 3.172175371952477e-06, + "loss": 0.7105, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9126126200538591, + "eval_loss": 0.7994533181190491, + "eval_precision": 0.9093278443889663, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.5846, + "eval_samples_per_second": 475.701, + "eval_steps_per_second": 3.824, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.2811819872967625e-06, + "loss": 0.7148, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.5306074913621072e-06, + "loss": 0.7112, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9115163814202354, + "eval_loss": 0.8017727136611938, + "eval_precision": 0.9079494118563345, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.6455, + "eval_samples_per_second": 471.912, + "eval_steps_per_second": 3.793, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 9.253777380021159e-07, + "loss": 0.7153, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9127162721135327, + "eval_loss": 0.7985677719116211, + "eval_precision": 0.9095597765167394, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.8314, + "eval_samples_per_second": 460.712, + "eval_steps_per_second": 3.703, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 4.6946471577894757e-07, + "loss": 0.7147, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.658604806838277e-07, + "loss": 0.7163, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9251662971175166, + "eval_f1": 0.9195148997386117, + "eval_loss": 0.7940902709960938, + "eval_precision": 0.9167199938977899, + "eval_recall": 0.9251662971175166, + "eval_runtime": 7.8749, + "eval_samples_per_second": 458.165, + "eval_steps_per_second": 3.683, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.6557519937563915e-08, + "loss": 0.715, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9148044194115135, + "eval_loss": 0.8035051226615906, + "eval_precision": 0.9161722340443917, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.274, + "eval_samples_per_second": 436.066, + "eval_steps_per_second": 3.505, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.13342346981599873, + "learning_rate": 4.7354878634581407e-05, + "metric": "eval/loss", + "weight_decay": 0.12408483813956132 + } +} diff --git a/run-kt2ie26h/checkpoint-630/training_args.bin b/run-kt2ie26h/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e5ff7b527678e58f733d230012ccae705bb3fc0 --- /dev/null +++ b/run-kt2ie26h/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8765ab0fe7b952026003619f8425a670cdbe1185f3a728eb407a535d66fe74fc +size 4792 diff --git a/run-kup178tp/checkpoint-616/model.safetensors b/run-kup178tp/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e56a29f055cfb0b48777484f372cb1040c787786 --- /dev/null +++ b/run-kup178tp/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8df025495a25529e210dff93ebdaa679087493840f7b38cf94658f771f2e80 +size 198025308 diff --git a/run-kup178tp/checkpoint-616/optimizer.pt b/run-kup178tp/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e9a4f4de5381690d94a1af8d39b2ce9b37e7ece --- /dev/null +++ b/run-kup178tp/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efbd7b7f04f72713473f90459fa9b454539bfdbe4632d049f6bde04674d8d60f +size 395900602 diff --git a/run-kup178tp/checkpoint-616/rng_state.pth b/run-kup178tp/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-kup178tp/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-kup178tp/checkpoint-616/scheduler.pt b/run-kup178tp/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bb32f8c9c32813eaa59e0bd2f3872e597be1b15 --- /dev/null +++ b/run-kup178tp/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b0240886e9300640bab3e9dd1ecec5be4410aaff52e2635d98da18a5c041e12 +size 1064 diff --git a/run-kup178tp/checkpoint-616/trainer_state.json b/run-kup178tp/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..44f5cdc0c8f2bfdf7381c4a6ba31e6c056f89499 --- /dev/null +++ b/run-kup178tp/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9197055191767554, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-kup178tp/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.863462048102962e-05, + "loss": 1.4743, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.0257872343063354, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2009, + "eval_samples_per_second": 439.953, + "eval_steps_per_second": 3.536, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.726924096205924e-05, + "loss": 1.113, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.5903861443088856e-05, + "loss": 0.9385, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8733370288248337, + "eval_f1": 0.8454181305319711, + "eval_loss": 0.9180569052696228, + "eval_precision": 0.8661355509810615, + "eval_recall": 0.8733370288248337, + "eval_runtime": 7.9112, + "eval_samples_per_second": 456.063, + "eval_steps_per_second": 3.666, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.453848192411847e-05, + "loss": 0.8662, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9041002030288464, + "eval_loss": 0.9000344276428223, + "eval_precision": 0.9005433421342476, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.286, + "eval_samples_per_second": 435.436, + "eval_steps_per_second": 3.5, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.317310240514809e-05, + "loss": 0.8243, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00011180772288617771, + "loss": 0.7985, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9072413858266618, + "eval_loss": 0.8054848909378052, + "eval_precision": 0.9034741113707262, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.8517, + "eval_samples_per_second": 459.519, + "eval_steps_per_second": 3.693, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00013044234336720732, + "loss": 0.7969, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001360810085612173, + "loss": 0.7729, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9089457911911749, + "eval_loss": 0.8009630441665649, + "eval_precision": 0.9063874493244409, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.2448, + "eval_samples_per_second": 437.612, + "eval_steps_per_second": 3.517, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00013560869242623842, + "loss": 0.7762, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9041019955654102, + "eval_f1": 0.9042165917612999, + "eval_loss": 0.8209497928619385, + "eval_precision": 0.90640140259345, + "eval_recall": 0.9041019955654102, + "eval_runtime": 7.6114, + "eval_samples_per_second": 474.028, + "eval_steps_per_second": 3.81, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00013474309378711354, + "loss": 0.7671, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00013348925443135306, + "loss": 0.762, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.912271133961234, + "eval_loss": 0.795735239982605, + "eval_precision": 0.9089939840567511, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.3206, + "eval_samples_per_second": 433.624, + "eval_steps_per_second": 3.485, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00013185447750306805, + "loss": 0.7561, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001298482849648942, + "loss": 0.7471, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9114285322030261, + "eval_loss": 0.8018564581871033, + "eval_precision": 0.9075578340445751, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.1074, + "eval_samples_per_second": 445.026, + "eval_steps_per_second": 3.577, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00012748236213612973, + "loss": 0.7411, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9113089779395261, + "eval_loss": 0.7982168197631836, + "eval_precision": 0.9097217546231211, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.9414, + "eval_samples_per_second": 454.326, + "eval_steps_per_second": 3.652, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001247704896301322, + "loss": 0.7438, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00012172846308741253, + "loss": 0.7382, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9015552597484715, + "eval_loss": 0.8132407069206238, + "eval_precision": 0.9061700168100267, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.0617, + "eval_samples_per_second": 447.549, + "eval_steps_per_second": 3.597, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00011837400117195201, + "loss": 0.732, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9083314656923788, + "eval_loss": 0.8111476898193359, + "eval_precision": 0.906541377474944, + "eval_recall": 0.9113082039911308, + "eval_runtime": 8.1701, + "eval_samples_per_second": 441.613, + "eval_steps_per_second": 3.55, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0001147266423666281, + "loss": 0.7367, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00011080763116887793, + "loss": 0.7265, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8841463414634146, + "eval_f1": 0.8918786430152501, + "eval_loss": 0.8648845553398132, + "eval_precision": 0.9068398385848906, + "eval_recall": 0.8841463414634146, + "eval_runtime": 7.9365, + "eval_samples_per_second": 454.609, + "eval_steps_per_second": 3.654, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001066397943494665, + "loss": 0.7192, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00010224740799510595, + "loss": 0.7182, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9084299093032018, + "eval_loss": 0.8118075728416443, + "eval_precision": 0.9052513204596241, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.6865, + "eval_samples_per_second": 469.397, + "eval_steps_per_second": 3.773, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 9.765605610935296e-05, + "loss": 0.7177, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9035968632167786, + "eval_loss": 0.8231350779533386, + "eval_precision": 0.9066102552534451, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8085, + "eval_samples_per_second": 462.059, + "eval_steps_per_second": 3.714, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 9.289248159538113e-05, + "loss": 0.7164, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 8.798443048859718e-05, + "loss": 0.7164, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9124409744395509, + "eval_loss": 0.804631769657135, + "eval_precision": 0.9095073227887353, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.6089, + "eval_samples_per_second": 474.184, + "eval_steps_per_second": 3.811, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 8.29604903463884e-05, + "loss": 0.7106, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 7.78499237363201e-05, + "loss": 0.7157, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.904379157427938, + "eval_f1": 0.904928082915014, + "eval_loss": 0.8187360763549805, + "eval_precision": 0.9097320987012659, + "eval_recall": 0.904379157427938, + "eval_runtime": 7.6371, + "eval_samples_per_second": 472.429, + "eval_steps_per_second": 3.797, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 7.268249779265152e-05, + "loss": 0.7047, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9099480407404161, + "eval_loss": 0.8040760159492493, + "eval_precision": 0.9083183111276815, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9951, + "eval_samples_per_second": 451.277, + "eval_steps_per_second": 3.627, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.748831083394038e-05, + "loss": 0.7078, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 6.229761705162273e-05, + "loss": 0.7058, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9144859113889288, + "eval_loss": 0.8029214143753052, + "eval_precision": 0.9130107798200123, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.0198, + "eval_samples_per_second": 449.888, + "eval_steps_per_second": 3.616, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.714065029069195e-05, + "loss": 0.7033, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.204744794888803e-05, + "loss": 0.7035, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9105597099735178, + "eval_loss": 0.8097445964813232, + "eval_precision": 0.9074178401189466, + "eval_recall": 0.9157427937915743, + "eval_runtime": 7.8776, + "eval_samples_per_second": 458.005, + "eval_steps_per_second": 3.681, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.70476760201185e-05, + "loss": 0.7008, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8905210643015521, + "eval_f1": 0.8946914815221136, + "eval_loss": 0.8393078446388245, + "eval_precision": 0.9013341428034565, + "eval_recall": 0.8905210643015521, + "eval_runtime": 8.0555, + "eval_samples_per_second": 447.893, + "eval_steps_per_second": 3.6, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.2170456301167327e-05, + "loss": 0.7022, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.744419676814821e-05, + "loss": 0.702, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9116789392743475, + "eval_loss": 0.8070911765098572, + "eval_precision": 0.9116992515087252, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.1383, + "eval_samples_per_second": 443.334, + "eval_steps_per_second": 3.563, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.289642611069535e-05, + "loss": 0.6984, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9072633281047107, + "eval_loss": 0.8117515444755554, + "eval_precision": 0.9047512286279082, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.7793, + "eval_samples_per_second": 463.795, + "eval_steps_per_second": 3.728, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.8553633387668522e-05, + "loss": 0.6992, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.4441113738317462e-05, + "loss": 0.6969, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9182395502946792, + "eval_loss": 0.7993093729019165, + "eval_precision": 0.9174564034962863, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.1559, + "eval_samples_per_second": 442.381, + "eval_steps_per_second": 3.556, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.058282104758025e-05, + "loss": 0.6956, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.7001228423685378e-05, + "loss": 0.6949, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9100326651791877, + "eval_loss": 0.8072912096977234, + "eval_precision": 0.9079789644725773, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.6828, + "eval_samples_per_second": 469.623, + "eval_steps_per_second": 3.775, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.371719730072268e-05, + "loss": 0.6965, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9104157226145523, + "eval_loss": 0.8068956732749939, + "eval_precision": 0.9072952909569303, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.5214, + "eval_samples_per_second": 479.696, + "eval_steps_per_second": 3.856, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.0749855928611861e-05, + "loss": 0.6905, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 8.116487958219129e-06, + "loss": 0.6944, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9142172002222043, + "eval_loss": 0.8014960885047913, + "eval_precision": 0.9112200526343676, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.9166, + "eval_samples_per_second": 455.751, + "eval_steps_per_second": 3.663, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.832431770571437e-06, + "loss": 0.6929, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.910991136538223e-06, + "loss": 0.6929, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9103366730463809, + "eval_loss": 0.8089818358421326, + "eval_precision": 0.9076330086684216, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.949, + "eval_samples_per_second": 453.895, + "eval_steps_per_second": 3.648, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.3633577273545097e-06, + "loss": 0.6934, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9110473849154439, + "eval_loss": 0.8041980862617493, + "eval_precision": 0.9076891452984372, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.0725, + "eval_samples_per_second": 446.952, + "eval_steps_per_second": 3.592, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.198545927332323e-06, + "loss": 0.6934, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.2334032845207557e-07, + "loss": 0.6951, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9197055191767554, + "eval_loss": 0.7997114658355713, + "eval_precision": 0.9173905683682001, + "eval_recall": 0.9229490022172949, + "eval_runtime": 7.6898, + "eval_samples_per_second": 469.194, + "eval_steps_per_second": 3.771, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4842425483522051, + "learning_rate": 0.00013617607274598567, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-kup178tp/checkpoint-616/training_args.bin b/run-kup178tp/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..15137bdeec87c51bdbb5264dbe8cd6994ebc22c6 --- /dev/null +++ b/run-kup178tp/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275745f1f028010f71352012db620446841504c96b7cb0d4042b3b41239504a1 +size 4792 diff --git a/run-kup178tp/checkpoint-630/model.safetensors b/run-kup178tp/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39091d6dda3a8e9cda7af3bfc8255e86578d53f7 --- /dev/null +++ b/run-kup178tp/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00da74876715df27bd48c4c8832ddc2ca7e7e662844bbc07409beefc18e86f53 +size 198025308 diff --git a/run-kup178tp/checkpoint-630/optimizer.pt b/run-kup178tp/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..70926f9e51b5e51cd98bf0fb341650226e4a7e83 --- /dev/null +++ b/run-kup178tp/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe0b60c96bc0296ae9354673e11bc778bc68854de6fccda12c0d0945a67a74ba +size 395900602 diff --git a/run-kup178tp/checkpoint-630/rng_state.pth b/run-kup178tp/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-kup178tp/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-kup178tp/checkpoint-630/scheduler.pt b/run-kup178tp/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d323f3452a41c02c9b6297654bd926380de246a0 --- /dev/null +++ b/run-kup178tp/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:605d5fd2070ca49079d4beafe363b7c67cc322be1e52508e85bc9f27573627a3 +size 1064 diff --git a/run-kup178tp/checkpoint-630/trainer_state.json b/run-kup178tp/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e8aa70bb326fe4bfdd0dcea6afe7657b533a3c56 --- /dev/null +++ b/run-kup178tp/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9197055191767554, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-kup178tp/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.863462048102962e-05, + "loss": 1.4743, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.0257872343063354, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2009, + "eval_samples_per_second": 439.953, + "eval_steps_per_second": 3.536, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.726924096205924e-05, + "loss": 1.113, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.5903861443088856e-05, + "loss": 0.9385, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8733370288248337, + "eval_f1": 0.8454181305319711, + "eval_loss": 0.9180569052696228, + "eval_precision": 0.8661355509810615, + "eval_recall": 0.8733370288248337, + "eval_runtime": 7.9112, + "eval_samples_per_second": 456.063, + "eval_steps_per_second": 3.666, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.453848192411847e-05, + "loss": 0.8662, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9041002030288464, + "eval_loss": 0.9000344276428223, + "eval_precision": 0.9005433421342476, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.286, + "eval_samples_per_second": 435.436, + "eval_steps_per_second": 3.5, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.317310240514809e-05, + "loss": 0.8243, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00011180772288617771, + "loss": 0.7985, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9072413858266618, + "eval_loss": 0.8054848909378052, + "eval_precision": 0.9034741113707262, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.8517, + "eval_samples_per_second": 459.519, + "eval_steps_per_second": 3.693, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00013044234336720732, + "loss": 0.7969, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001360810085612173, + "loss": 0.7729, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9089457911911749, + "eval_loss": 0.8009630441665649, + "eval_precision": 0.9063874493244409, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.2448, + "eval_samples_per_second": 437.612, + "eval_steps_per_second": 3.517, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00013560869242623842, + "loss": 0.7762, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9041019955654102, + "eval_f1": 0.9042165917612999, + "eval_loss": 0.8209497928619385, + "eval_precision": 0.90640140259345, + "eval_recall": 0.9041019955654102, + "eval_runtime": 7.6114, + "eval_samples_per_second": 474.028, + "eval_steps_per_second": 3.81, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00013474309378711354, + "loss": 0.7671, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00013348925443135306, + "loss": 0.762, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.912271133961234, + "eval_loss": 0.795735239982605, + "eval_precision": 0.9089939840567511, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.3206, + "eval_samples_per_second": 433.624, + "eval_steps_per_second": 3.485, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00013185447750306805, + "loss": 0.7561, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001298482849648942, + "loss": 0.7471, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9114285322030261, + "eval_loss": 0.8018564581871033, + "eval_precision": 0.9075578340445751, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.1074, + "eval_samples_per_second": 445.026, + "eval_steps_per_second": 3.577, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00012748236213612973, + "loss": 0.7411, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9113089779395261, + "eval_loss": 0.7982168197631836, + "eval_precision": 0.9097217546231211, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.9414, + "eval_samples_per_second": 454.326, + "eval_steps_per_second": 3.652, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001247704896301322, + "loss": 0.7438, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00012172846308741253, + "loss": 0.7382, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9015552597484715, + "eval_loss": 0.8132407069206238, + "eval_precision": 0.9061700168100267, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.0617, + "eval_samples_per_second": 447.549, + "eval_steps_per_second": 3.597, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00011837400117195201, + "loss": 0.732, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9083314656923788, + "eval_loss": 0.8111476898193359, + "eval_precision": 0.906541377474944, + "eval_recall": 0.9113082039911308, + "eval_runtime": 8.1701, + "eval_samples_per_second": 441.613, + "eval_steps_per_second": 3.55, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0001147266423666281, + "loss": 0.7367, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00011080763116887793, + "loss": 0.7265, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8841463414634146, + "eval_f1": 0.8918786430152501, + "eval_loss": 0.8648845553398132, + "eval_precision": 0.9068398385848906, + "eval_recall": 0.8841463414634146, + "eval_runtime": 7.9365, + "eval_samples_per_second": 454.609, + "eval_steps_per_second": 3.654, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001066397943494665, + "loss": 0.7192, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00010224740799510595, + "loss": 0.7182, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9084299093032018, + "eval_loss": 0.8118075728416443, + "eval_precision": 0.9052513204596241, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.6865, + "eval_samples_per_second": 469.397, + "eval_steps_per_second": 3.773, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 9.765605610935296e-05, + "loss": 0.7177, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9035968632167786, + "eval_loss": 0.8231350779533386, + "eval_precision": 0.9066102552534451, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8085, + "eval_samples_per_second": 462.059, + "eval_steps_per_second": 3.714, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 9.289248159538113e-05, + "loss": 0.7164, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 8.798443048859718e-05, + "loss": 0.7164, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9124409744395509, + "eval_loss": 0.804631769657135, + "eval_precision": 0.9095073227887353, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.6089, + "eval_samples_per_second": 474.184, + "eval_steps_per_second": 3.811, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 8.29604903463884e-05, + "loss": 0.7106, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 7.78499237363201e-05, + "loss": 0.7157, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.904379157427938, + "eval_f1": 0.904928082915014, + "eval_loss": 0.8187360763549805, + "eval_precision": 0.9097320987012659, + "eval_recall": 0.904379157427938, + "eval_runtime": 7.6371, + "eval_samples_per_second": 472.429, + "eval_steps_per_second": 3.797, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 7.268249779265152e-05, + "loss": 0.7047, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9099480407404161, + "eval_loss": 0.8040760159492493, + "eval_precision": 0.9083183111276815, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9951, + "eval_samples_per_second": 451.277, + "eval_steps_per_second": 3.627, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.748831083394038e-05, + "loss": 0.7078, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 6.229761705162273e-05, + "loss": 0.7058, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9144859113889288, + "eval_loss": 0.8029214143753052, + "eval_precision": 0.9130107798200123, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.0198, + "eval_samples_per_second": 449.888, + "eval_steps_per_second": 3.616, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.714065029069195e-05, + "loss": 0.7033, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.204744794888803e-05, + "loss": 0.7035, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9105597099735178, + "eval_loss": 0.8097445964813232, + "eval_precision": 0.9074178401189466, + "eval_recall": 0.9157427937915743, + "eval_runtime": 7.8776, + "eval_samples_per_second": 458.005, + "eval_steps_per_second": 3.681, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.70476760201185e-05, + "loss": 0.7008, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8905210643015521, + "eval_f1": 0.8946914815221136, + "eval_loss": 0.8393078446388245, + "eval_precision": 0.9013341428034565, + "eval_recall": 0.8905210643015521, + "eval_runtime": 8.0555, + "eval_samples_per_second": 447.893, + "eval_steps_per_second": 3.6, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.2170456301167327e-05, + "loss": 0.7022, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.744419676814821e-05, + "loss": 0.702, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9116789392743475, + "eval_loss": 0.8070911765098572, + "eval_precision": 0.9116992515087252, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.1383, + "eval_samples_per_second": 443.334, + "eval_steps_per_second": 3.563, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.289642611069535e-05, + "loss": 0.6984, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9072633281047107, + "eval_loss": 0.8117515444755554, + "eval_precision": 0.9047512286279082, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.7793, + "eval_samples_per_second": 463.795, + "eval_steps_per_second": 3.728, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.8553633387668522e-05, + "loss": 0.6992, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.4441113738317462e-05, + "loss": 0.6969, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9182395502946792, + "eval_loss": 0.7993093729019165, + "eval_precision": 0.9174564034962863, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.1559, + "eval_samples_per_second": 442.381, + "eval_steps_per_second": 3.556, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.058282104758025e-05, + "loss": 0.6956, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.7001228423685378e-05, + "loss": 0.6949, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9100326651791877, + "eval_loss": 0.8072912096977234, + "eval_precision": 0.9079789644725773, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.6828, + "eval_samples_per_second": 469.623, + "eval_steps_per_second": 3.775, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.371719730072268e-05, + "loss": 0.6965, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9104157226145523, + "eval_loss": 0.8068956732749939, + "eval_precision": 0.9072952909569303, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.5214, + "eval_samples_per_second": 479.696, + "eval_steps_per_second": 3.856, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.0749855928611861e-05, + "loss": 0.6905, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 8.116487958219129e-06, + "loss": 0.6944, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9142172002222043, + "eval_loss": 0.8014960885047913, + "eval_precision": 0.9112200526343676, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.9166, + "eval_samples_per_second": 455.751, + "eval_steps_per_second": 3.663, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.832431770571437e-06, + "loss": 0.6929, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.910991136538223e-06, + "loss": 0.6929, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9103366730463809, + "eval_loss": 0.8089818358421326, + "eval_precision": 0.9076330086684216, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.949, + "eval_samples_per_second": 453.895, + "eval_steps_per_second": 3.648, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.3633577273545097e-06, + "loss": 0.6934, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9110473849154439, + "eval_loss": 0.8041980862617493, + "eval_precision": 0.9076891452984372, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.0725, + "eval_samples_per_second": 446.952, + "eval_steps_per_second": 3.592, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.198545927332323e-06, + "loss": 0.6934, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.2334032845207557e-07, + "loss": 0.6951, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9197055191767554, + "eval_loss": 0.7997114658355713, + "eval_precision": 0.9173905683682001, + "eval_recall": 0.9229490022172949, + "eval_runtime": 7.6898, + "eval_samples_per_second": 469.194, + "eval_steps_per_second": 3.771, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 4.2256212658241294e-08, + "loss": 0.6896, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.91591214908413, + "eval_loss": 0.8049286603927612, + "eval_precision": 0.9144494098259612, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.0586, + "eval_samples_per_second": 447.722, + "eval_steps_per_second": 3.599, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4842425483522051, + "learning_rate": 0.00013617607274598567, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-kup178tp/checkpoint-630/training_args.bin b/run-kup178tp/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..15137bdeec87c51bdbb5264dbe8cd6994ebc22c6 --- /dev/null +++ b/run-kup178tp/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275745f1f028010f71352012db620446841504c96b7cb0d4042b3b41239504a1 +size 4792 diff --git a/run-kyze5nt2/checkpoint-1190/model.safetensors b/run-kyze5nt2/checkpoint-1190/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8b92d63dd0d33246f25505c6a4c68dba0621993 --- /dev/null +++ b/run-kyze5nt2/checkpoint-1190/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c674b93a9059fe269e474aa9d9ea84df42a1d179fda975da6453a2980033ffbb +size 198025308 diff --git a/run-kyze5nt2/checkpoint-1190/optimizer.pt b/run-kyze5nt2/checkpoint-1190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..47454a0c75e84ba12bef9de00c5a0dcb881b9255 --- /dev/null +++ b/run-kyze5nt2/checkpoint-1190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dcfae54a47282733e9a3394800142c33f65c842efe5f6fd1b21d044f75ae917 +size 395900602 diff --git a/run-kyze5nt2/checkpoint-1190/rng_state.pth b/run-kyze5nt2/checkpoint-1190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa10329c52a02715f873c9a50812feb7d32c8cd3 --- /dev/null +++ b/run-kyze5nt2/checkpoint-1190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f5febab37757cc5268c77056c937c9c526090d892464a785cf2004d48e5d85 +size 14244 diff --git a/run-kyze5nt2/checkpoint-1190/scheduler.pt b/run-kyze5nt2/checkpoint-1190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b48810c7106bd38d1fec311f38f70abe5f84bce --- /dev/null +++ b/run-kyze5nt2/checkpoint-1190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:024a94387f356ef70457b1ec9a3acfc8a311b9b20b965ce348f9e6ba74f0822e +size 1064 diff --git a/run-kyze5nt2/checkpoint-1190/trainer_state.json b/run-kyze5nt2/checkpoint-1190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..99ffcc7053a592ae5e3d585e153d9e06f10bcce2 --- /dev/null +++ b/run-kyze5nt2/checkpoint-1190/trainer_state.json @@ -0,0 +1,549 @@ +{ + "best_metric": 0.9271064301552107, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-kyze5nt2/checkpoint-1190", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 1190, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.6460024833071605e-05, + "loss": 1.4027, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8289911308203991, + "eval_loss": 0.9471043348312378, + "eval_runtime": 6.8217, + "eval_samples_per_second": 528.9, + "eval_steps_per_second": 8.356, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.292004966614321e-05, + "loss": 0.9838, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 4.9380074499214806e-05, + "loss": 0.8722, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.833198070526123, + "eval_runtime": 7.0053, + "eval_samples_per_second": 515.04, + "eval_steps_per_second": 8.137, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 6.584009933228642e-05, + "loss": 0.8214, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8076173067092896, + "eval_runtime": 6.7082, + "eval_samples_per_second": 537.852, + "eval_steps_per_second": 8.497, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 8.230012416535802e-05, + "loss": 0.8005, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 9.876014899842961e-05, + "loss": 0.789, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8359273672103882, + "eval_runtime": 6.8003, + "eval_samples_per_second": 530.563, + "eval_steps_per_second": 8.382, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011014080233214967, + "loss": 0.7818, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010988935930278963, + "loss": 0.7742, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.799519956111908, + "eval_runtime": 7.1032, + "eval_samples_per_second": 507.937, + "eval_steps_per_second": 8.025, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00010932799285091163, + "loss": 0.7674, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.79893559217453, + "eval_runtime": 6.8246, + "eval_samples_per_second": 528.674, + "eval_steps_per_second": 8.352, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00010845987713423331, + "loss": 0.7571, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00010728992077562072, + "loss": 0.7503, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.7988522052764893, + "eval_runtime": 6.8193, + "eval_samples_per_second": 529.087, + "eval_steps_per_second": 8.359, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00010582473910805748, + "loss": 0.7504, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00010407261676929195, + "loss": 0.7414, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.7959096431732178, + "eval_runtime": 6.9085, + "eval_samples_per_second": 522.258, + "eval_steps_per_second": 8.251, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010204346085766469, + "loss": 0.7322, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8098660707473755, + "eval_runtime": 6.7755, + "eval_samples_per_second": 532.504, + "eval_steps_per_second": 8.413, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 9.974874491398974e-05, + "loss": 0.7408, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 9.720144404623459e-05, + "loss": 0.7306, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8001447916030884, + "eval_runtime": 6.7715, + "eval_samples_per_second": 532.819, + "eval_steps_per_second": 8.418, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 9.441596156382669e-05, + "loss": 0.7259, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8067497611045837, + "eval_runtime": 6.8259, + "eval_samples_per_second": 528.572, + "eval_steps_per_second": 8.35, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 9.140804753642083e-05, + "loss": 0.722, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 8.81947097376241e-05, + "loss": 0.7209, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8020673990249634, + "eval_runtime": 6.7987, + "eval_samples_per_second": 530.693, + "eval_steps_per_second": 8.384, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 8.479411747723293e-05, + "loss": 0.7197, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 8.122549886574774e-05, + "loss": 0.7203, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.80466628074646, + "eval_runtime": 6.8772, + "eval_samples_per_second": 524.631, + "eval_steps_per_second": 8.288, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 7.750903209206659e-05, + "loss": 0.716, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.805901288986206, + "eval_runtime": 6.4865, + "eval_samples_per_second": 556.232, + "eval_steps_per_second": 8.787, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 7.366573132911102e-05, + "loss": 0.7177, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 6.971732791251253e-05, + "loss": 0.7076, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8146103024482727, + "eval_runtime": 6.9894, + "eval_samples_per_second": 516.21, + "eval_steps_per_second": 8.155, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 6.56861474642161e-05, + "loss": 0.7125, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 6.15949836557859e-05, + "loss": 0.7061, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8091168999671936, + "eval_runtime": 6.6146, + "eval_samples_per_second": 545.457, + "eval_steps_per_second": 8.617, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 5.7466969325198465e-05, + "loss": 0.707, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.8022651672363281, + "eval_runtime": 7.0619, + "eval_samples_per_second": 510.908, + "eval_steps_per_second": 8.071, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 5.3325445675873006e-05, + "loss": 0.7012, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 4.919383029753269e-05, + "loss": 0.7017, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.7995224595069885, + "eval_runtime": 6.9886, + "eval_samples_per_second": 516.271, + "eval_steps_per_second": 8.156, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 4.5095484755151074e-05, + "loss": 0.6985, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 4.105358249468222e-05, + "loss": 0.703, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.803298830986023, + "eval_runtime": 6.6421, + "eval_samples_per_second": 543.2, + "eval_steps_per_second": 8.582, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 3.709097781247952e-05, + "loss": 0.7059, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.7955936193466187, + "eval_runtime": 6.9747, + "eval_samples_per_second": 517.301, + "eval_steps_per_second": 8.172, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 3.32300766292958e-05, + "loss": 0.6973, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 2.949270979955193e-05, + "loss": 0.6978, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8064149022102356, + "eval_runtime": 6.8816, + "eval_samples_per_second": 524.295, + "eval_steps_per_second": 8.283, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 2.5900009672227228e-05, + "loss": 0.6955, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8048540353775024, + "eval_runtime": 6.5326, + "eval_samples_per_second": 552.31, + "eval_steps_per_second": 8.726, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 2.2472290601338133e-05, + "loss": 0.6973, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 1.9228934081639798e-05, + "loss": 0.696, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.8023791909217834, + "eval_runtime": 6.9268, + "eval_samples_per_second": 520.875, + "eval_steps_per_second": 8.229, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.618827915903305e-05, + "loss": 0.6957, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.336751873533276e-05, + "loss": 0.6946, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.7988317608833313, + "eval_runtime": 6.9958, + "eval_samples_per_second": 515.735, + "eval_steps_per_second": 8.148, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.078260235372703e-05, + "loss": 0.69, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8035066723823547, + "eval_runtime": 6.7496, + "eval_samples_per_second": 534.546, + "eval_steps_per_second": 8.445, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 8.448146014610455e-06, + "loss": 0.6908, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.3773495317242506e-06, + "loss": 0.689, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8078456521034241, + "eval_runtime": 7.0573, + "eval_samples_per_second": 511.245, + "eval_steps_per_second": 8.077, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.581921895899396e-06, + "loss": 0.6917, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.0720150684219414e-06, + "loss": 0.6949, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7975802421569824, + "eval_runtime": 6.5701, + "eval_samples_per_second": 549.159, + "eval_steps_per_second": 8.676, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.8561665783753552e-06, + "loss": 0.6912, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9271064301552107, + "eval_loss": 0.7939683794975281, + "eval_runtime": 6.681, + "eval_samples_per_second": 540.04, + "eval_steps_per_second": 8.532, + "step": 1190 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00011015555080594073, + "metric": "eval/loss", + "warmup_ratio": 0.13779793029475776 + } +} diff --git a/run-kyze5nt2/checkpoint-1190/training_args.bin b/run-kyze5nt2/checkpoint-1190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7056f7f8ef993e063c8e4fb7ab614189f4482fc4 --- /dev/null +++ b/run-kyze5nt2/checkpoint-1190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc41f36e46e7115db4b77319f62c2d01ab9d78c8c27b31460b8a06012d6abbbf +size 4792 diff --git a/run-kyze5nt2/checkpoint-1260/model.safetensors b/run-kyze5nt2/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..399525c5688d7c7c7be706a498c69c7e31703936 --- /dev/null +++ b/run-kyze5nt2/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23ae59c4cc61ececa31ebf2ecb5ae58c4d44387be979ca5983edef61103b1298 +size 198025308 diff --git a/run-kyze5nt2/checkpoint-1260/optimizer.pt b/run-kyze5nt2/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..55722a19c15caa529485cf4ffcf8d0cb60dbce0e --- /dev/null +++ b/run-kyze5nt2/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2db6dc4793fc24fdb16ead8c5494761b81e69645ac1506d693316bffd8497401 +size 395900602 diff --git a/run-kyze5nt2/checkpoint-1260/rng_state.pth b/run-kyze5nt2/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-kyze5nt2/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-kyze5nt2/checkpoint-1260/scheduler.pt b/run-kyze5nt2/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6307ea39cf383b235994c064aea425c616bc981f --- /dev/null +++ b/run-kyze5nt2/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87aa967ec5fd01aebc81d6f2f7cc737c3e60b16f4c89343709fe515244307ab2 +size 1064 diff --git a/run-kyze5nt2/checkpoint-1260/trainer_state.json b/run-kyze5nt2/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..28242330234089c207b2c3bd3e7a655f5a7f5f3d --- /dev/null +++ b/run-kyze5nt2/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9271064301552107, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-kyze5nt2/checkpoint-1190", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.6460024833071605e-05, + "loss": 1.4027, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8289911308203991, + "eval_loss": 0.9471043348312378, + "eval_runtime": 6.8217, + "eval_samples_per_second": 528.9, + "eval_steps_per_second": 8.356, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.292004966614321e-05, + "loss": 0.9838, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 4.9380074499214806e-05, + "loss": 0.8722, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.833198070526123, + "eval_runtime": 7.0053, + "eval_samples_per_second": 515.04, + "eval_steps_per_second": 8.137, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 6.584009933228642e-05, + "loss": 0.8214, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8076173067092896, + "eval_runtime": 6.7082, + "eval_samples_per_second": 537.852, + "eval_steps_per_second": 8.497, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 8.230012416535802e-05, + "loss": 0.8005, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 9.876014899842961e-05, + "loss": 0.789, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8359273672103882, + "eval_runtime": 6.8003, + "eval_samples_per_second": 530.563, + "eval_steps_per_second": 8.382, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011014080233214967, + "loss": 0.7818, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010988935930278963, + "loss": 0.7742, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.799519956111908, + "eval_runtime": 7.1032, + "eval_samples_per_second": 507.937, + "eval_steps_per_second": 8.025, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00010932799285091163, + "loss": 0.7674, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.79893559217453, + "eval_runtime": 6.8246, + "eval_samples_per_second": 528.674, + "eval_steps_per_second": 8.352, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00010845987713423331, + "loss": 0.7571, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00010728992077562072, + "loss": 0.7503, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.7988522052764893, + "eval_runtime": 6.8193, + "eval_samples_per_second": 529.087, + "eval_steps_per_second": 8.359, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00010582473910805748, + "loss": 0.7504, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00010407261676929195, + "loss": 0.7414, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.7959096431732178, + "eval_runtime": 6.9085, + "eval_samples_per_second": 522.258, + "eval_steps_per_second": 8.251, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010204346085766469, + "loss": 0.7322, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8098660707473755, + "eval_runtime": 6.7755, + "eval_samples_per_second": 532.504, + "eval_steps_per_second": 8.413, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 9.974874491398974e-05, + "loss": 0.7408, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 9.720144404623459e-05, + "loss": 0.7306, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8001447916030884, + "eval_runtime": 6.7715, + "eval_samples_per_second": 532.819, + "eval_steps_per_second": 8.418, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 9.441596156382669e-05, + "loss": 0.7259, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8067497611045837, + "eval_runtime": 6.8259, + "eval_samples_per_second": 528.572, + "eval_steps_per_second": 8.35, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 9.140804753642083e-05, + "loss": 0.722, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 8.81947097376241e-05, + "loss": 0.7209, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8020673990249634, + "eval_runtime": 6.7987, + "eval_samples_per_second": 530.693, + "eval_steps_per_second": 8.384, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 8.479411747723293e-05, + "loss": 0.7197, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 8.122549886574774e-05, + "loss": 0.7203, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.80466628074646, + "eval_runtime": 6.8772, + "eval_samples_per_second": 524.631, + "eval_steps_per_second": 8.288, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 7.750903209206659e-05, + "loss": 0.716, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.805901288986206, + "eval_runtime": 6.4865, + "eval_samples_per_second": 556.232, + "eval_steps_per_second": 8.787, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 7.366573132911102e-05, + "loss": 0.7177, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 6.971732791251253e-05, + "loss": 0.7076, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8146103024482727, + "eval_runtime": 6.9894, + "eval_samples_per_second": 516.21, + "eval_steps_per_second": 8.155, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 6.56861474642161e-05, + "loss": 0.7125, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 6.15949836557859e-05, + "loss": 0.7061, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8091168999671936, + "eval_runtime": 6.6146, + "eval_samples_per_second": 545.457, + "eval_steps_per_second": 8.617, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 5.7466969325198465e-05, + "loss": 0.707, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.8022651672363281, + "eval_runtime": 7.0619, + "eval_samples_per_second": 510.908, + "eval_steps_per_second": 8.071, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 5.3325445675873006e-05, + "loss": 0.7012, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 4.919383029753269e-05, + "loss": 0.7017, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.7995224595069885, + "eval_runtime": 6.9886, + "eval_samples_per_second": 516.271, + "eval_steps_per_second": 8.156, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 4.5095484755151074e-05, + "loss": 0.6985, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 4.105358249468222e-05, + "loss": 0.703, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.803298830986023, + "eval_runtime": 6.6421, + "eval_samples_per_second": 543.2, + "eval_steps_per_second": 8.582, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 3.709097781247952e-05, + "loss": 0.7059, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.7955936193466187, + "eval_runtime": 6.9747, + "eval_samples_per_second": 517.301, + "eval_steps_per_second": 8.172, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 3.32300766292958e-05, + "loss": 0.6973, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 2.949270979955193e-05, + "loss": 0.6978, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8064149022102356, + "eval_runtime": 6.8816, + "eval_samples_per_second": 524.295, + "eval_steps_per_second": 8.283, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 2.5900009672227228e-05, + "loss": 0.6955, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8048540353775024, + "eval_runtime": 6.5326, + "eval_samples_per_second": 552.31, + "eval_steps_per_second": 8.726, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 2.2472290601338133e-05, + "loss": 0.6973, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 1.9228934081639798e-05, + "loss": 0.696, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.8023791909217834, + "eval_runtime": 6.9268, + "eval_samples_per_second": 520.875, + "eval_steps_per_second": 8.229, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.618827915903305e-05, + "loss": 0.6957, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.336751873533276e-05, + "loss": 0.6946, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.7988317608833313, + "eval_runtime": 6.9958, + "eval_samples_per_second": 515.735, + "eval_steps_per_second": 8.148, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.078260235372703e-05, + "loss": 0.69, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8035066723823547, + "eval_runtime": 6.7496, + "eval_samples_per_second": 534.546, + "eval_steps_per_second": 8.445, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 8.448146014610455e-06, + "loss": 0.6908, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.3773495317242506e-06, + "loss": 0.689, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8078456521034241, + "eval_runtime": 7.0573, + "eval_samples_per_second": 511.245, + "eval_steps_per_second": 8.077, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.581921895899396e-06, + "loss": 0.6917, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.0720150684219414e-06, + "loss": 0.6949, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7975802421569824, + "eval_runtime": 6.5701, + "eval_samples_per_second": 549.159, + "eval_steps_per_second": 8.676, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.8561665783753552e-06, + "loss": 0.6912, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9271064301552107, + "eval_loss": 0.7939683794975281, + "eval_runtime": 6.681, + "eval_samples_per_second": 540.04, + "eval_steps_per_second": 8.532, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 9.412512485340762e-07, + "loss": 0.6896, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.3244232276672404e-07, + "loss": 0.6882, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8002775311470032, + "eval_runtime": 6.6672, + "eval_samples_per_second": 541.156, + "eval_steps_per_second": 8.549, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 3.318221474772081e-08, + "loss": 0.6909, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9251662971175166, + "eval_loss": 0.7961582541465759, + "eval_runtime": 6.9128, + "eval_samples_per_second": 521.934, + "eval_steps_per_second": 8.246, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00011015555080594073, + "metric": "eval/loss", + "warmup_ratio": 0.13779793029475776 + } +} diff --git a/run-kyze5nt2/checkpoint-1260/training_args.bin b/run-kyze5nt2/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7056f7f8ef993e063c8e4fb7ab614189f4482fc4 --- /dev/null +++ b/run-kyze5nt2/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc41f36e46e7115db4b77319f62c2d01ab9d78c8c27b31460b8a06012d6abbbf +size 4792 diff --git a/run-lx0qvogl/checkpoint-616/model.safetensors b/run-lx0qvogl/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e09428aba37b16f0b433adb4a8980bc2de821990 --- /dev/null +++ b/run-lx0qvogl/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a106c8d97761a84c19b19a096664e87465f7490b7f9b185b6d90f30c21c9a9a9 +size 198025308 diff --git a/run-lx0qvogl/checkpoint-616/optimizer.pt b/run-lx0qvogl/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..606d573453af89ca70342c4a3f85214256ab8bb3 --- /dev/null +++ b/run-lx0qvogl/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51867c9d44766789277b85bb22eb15416d4cdd0ed065885970607da48176c194 +size 395900602 diff --git a/run-lx0qvogl/checkpoint-616/rng_state.pth b/run-lx0qvogl/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-lx0qvogl/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-lx0qvogl/checkpoint-616/scheduler.pt b/run-lx0qvogl/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..16f550f5c7442cc02c459da64a98c0183c2e7acc --- /dev/null +++ b/run-lx0qvogl/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ebe91f3d06a63069d79f1e4d32035bd75d2a77b14cc781d4f84ecfb4c47d9dd +size 1064 diff --git a/run-lx0qvogl/checkpoint-616/trainer_state.json b/run-lx0qvogl/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eb00e2c74767972611afda814b7e16ebe01fc84b --- /dev/null +++ b/run-lx0qvogl/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.919291398103805, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-lx0qvogl/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.562350394578191e-05, + "loss": 1.3781, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9621445536613464, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.1395, + "eval_samples_per_second": 443.272, + "eval_steps_per_second": 3.563, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 9.124700789156382e-05, + "loss": 0.9936, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001368705118373457, + "loss": 0.87, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8930155210643016, + "eval_f1": 0.8863523976196696, + "eval_loss": 0.951144814491272, + "eval_precision": 0.8885140313409388, + "eval_recall": 0.8930155210643016, + "eval_runtime": 7.8768, + "eval_samples_per_second": 458.054, + "eval_steps_per_second": 3.682, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018249401578312763, + "loss": 0.8107, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9089018492475383, + "eval_loss": 0.8008581399917603, + "eval_precision": 0.9051878951577181, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.8797, + "eval_samples_per_second": 457.886, + "eval_steps_per_second": 3.68, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00022811751972890952, + "loss": 0.796, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0002737410236746914, + "loss": 0.7828, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8932926829268293, + "eval_f1": 0.8707019283703918, + "eval_loss": 0.8425114750862122, + "eval_precision": 0.8861667655804502, + "eval_recall": 0.8932926829268293, + "eval_runtime": 7.7019, + "eval_samples_per_second": 468.457, + "eval_steps_per_second": 3.765, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00031936452762047334, + "loss": 0.7937, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00033316978134108165, + "loss": 0.7667, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.6047671840354767, + "eval_f1": 0.6650227310603198, + "eval_loss": 1.220944881439209, + "eval_precision": 0.8763632011964403, + "eval_recall": 0.6047671840354767, + "eval_runtime": 8.2623, + "eval_samples_per_second": 436.681, + "eval_steps_per_second": 3.51, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00033201340055673435, + "loss": 0.7734, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8791574279379157, + "eval_f1": 0.8830665613028168, + "eval_loss": 0.8561235666275024, + "eval_precision": 0.8919972087302618, + "eval_recall": 0.8791574279379157, + "eval_runtime": 7.7476, + "eval_samples_per_second": 465.693, + "eval_steps_per_second": 3.743, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00032989413856437017, + "loss": 0.7703, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00032682433927046185, + "loss": 0.7597, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.908999186603767, + "eval_loss": 0.8065426349639893, + "eval_precision": 0.9048440615575271, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.8646, + "eval_samples_per_second": 458.765, + "eval_steps_per_second": 3.687, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00032282188310485264, + "loss": 0.7503, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.000317910082873957, + "loss": 0.7423, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.799889135254989, + "eval_f1": 0.8263020278336493, + "eval_loss": 0.9863446354866028, + "eval_precision": 0.884224920712868, + "eval_recall": 0.799889135254989, + "eval_runtime": 8.0924, + "eval_samples_per_second": 445.853, + "eval_steps_per_second": 3.584, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0003121175479724044, + "loss": 0.7391, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.8870507643540921, + "eval_loss": 0.8355196714401245, + "eval_precision": 0.8969987604587748, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.2242, + "eval_samples_per_second": 499.435, + "eval_steps_per_second": 4.014, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003054780177440432, + "loss": 0.7362, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0002980301649629129, + "loss": 0.7363, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9074532266216762, + "eval_loss": 0.8040934801101685, + "eval_precision": 0.9038499409721593, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.2736, + "eval_samples_per_second": 436.087, + "eval_steps_per_second": 3.505, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00028981737057883687, + "loss": 0.7254, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8589246119733924, + "eval_f1": 0.8713494645399834, + "eval_loss": 0.8952365517616272, + "eval_precision": 0.8941123985461512, + "eval_recall": 0.8589246119733924, + "eval_runtime": 7.8435, + "eval_samples_per_second": 459.997, + "eval_steps_per_second": 3.697, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002808874710396549, + "loss": 0.7327, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00027129247966185143, + "loss": 0.7206, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8942101012427945, + "eval_loss": 0.8449982404708862, + "eval_precision": 0.9058863819587922, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.5677, + "eval_samples_per_second": 476.76, + "eval_steps_per_second": 3.832, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00026108828367249015, + "loss": 0.7137, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.000250334318687071, + "loss": 0.7136, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8871951219512195, + "eval_f1": 0.8898270475008228, + "eval_loss": 0.8576585054397583, + "eval_precision": 0.8969080950837554, + "eval_recall": 0.8871951219512195, + "eval_runtime": 7.7891, + "eval_samples_per_second": 463.212, + "eval_steps_per_second": 3.723, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00023909322251935603, + "loss": 0.7155, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.8985044393952527, + "eval_loss": 0.824130654335022, + "eval_precision": 0.8974521897966055, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.0383, + "eval_samples_per_second": 448.853, + "eval_steps_per_second": 3.608, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00022743047033959112, + "loss": 0.7144, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00021541399330618903, + "loss": 0.7111, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9078723989223497, + "eval_loss": 0.819262683391571, + "eval_precision": 0.9058314035472377, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.8855, + "eval_samples_per_second": 457.548, + "eval_steps_per_second": 3.678, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00020311378289220318, + "loss": 0.7092, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00019060148321124132, + "loss": 0.7096, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.899719290953105, + "eval_loss": 0.8234288096427917, + "eval_precision": 0.9008266238288629, + "eval_recall": 0.9090909090909091, + "eval_runtime": 8.0847, + "eval_samples_per_second": 446.274, + "eval_steps_per_second": 3.587, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00017794997371736656, + "loss": 0.7067, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9027161862527716, + "eval_f1": 0.9018413144748951, + "eval_loss": 0.8304429650306702, + "eval_precision": 0.9029355611183134, + "eval_recall": 0.9027161862527716, + "eval_runtime": 8.1806, + "eval_samples_per_second": 441.046, + "eval_steps_per_second": 3.545, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0001652329447096047, + "loss": 0.7066, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00015252446811358283, + "loss": 0.7027, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9063090766601901, + "eval_loss": 0.817476749420166, + "eval_precision": 0.9053993216547231, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.0902, + "eval_samples_per_second": 508.868, + "eval_steps_per_second": 4.09, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013989856604033644, + "loss": 0.699, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00012742877963526782, + "loss": 0.7009, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9043528433467256, + "eval_loss": 0.8197091221809387, + "eval_precision": 0.9005086256404702, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.5623, + "eval_samples_per_second": 477.101, + "eval_steps_per_second": 3.835, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001151877407285488, + "loss": 0.697, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.901439631090545, + "eval_loss": 0.8282056450843811, + "eval_precision": 0.902060147136351, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.9649, + "eval_samples_per_second": 452.987, + "eval_steps_per_second": 3.641, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00010324674878194386, + "loss": 0.6974, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 9.167535559617973e-05, + "loss": 0.6942, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9139313584199102, + "eval_loss": 0.8102231025695801, + "eval_precision": 0.9134147170386875, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.7971, + "eval_samples_per_second": 462.737, + "eval_steps_per_second": 3.719, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 8.05409601977848e-05, + "loss": 0.6928, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9135441143533733, + "eval_loss": 0.8085286617279053, + "eval_precision": 0.9113853190522553, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.9084, + "eval_samples_per_second": 456.222, + "eval_steps_per_second": 3.667, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.990841626503174e-05, + "loss": 0.6932, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.98396543795787e-05, + "loss": 0.6905, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9159633804214986, + "eval_loss": 0.8035997152328491, + "eval_precision": 0.9125647049088704, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.718, + "eval_samples_per_second": 467.476, + "eval_steps_per_second": 3.757, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 5.0393321304052365e-05, + "loss": 0.6879, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 4.1624438386646015e-05, + "loss": 0.6876, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9109036857849093, + "eval_loss": 0.8103654384613037, + "eval_precision": 0.9105360018186442, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.3571, + "eval_samples_per_second": 490.414, + "eval_steps_per_second": 3.942, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.358408108239676e-05, + "loss": 0.6901, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9115018835295827, + "eval_loss": 0.8098926544189453, + "eval_precision": 0.9089303954806013, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.4893, + "eval_samples_per_second": 481.753, + "eval_steps_per_second": 3.872, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.6319081457810925e-05, + "loss": 0.6847, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.9871755411636988e-05, + "loss": 0.6863, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9139224661820099, + "eval_loss": 0.8063018918037415, + "eval_precision": 0.9116659965561222, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0274, + "eval_samples_per_second": 449.461, + "eval_steps_per_second": 3.613, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.427965620062186e-05, + "loss": 0.6872, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.575355705871114e-06, + "loss": 0.6856, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9109377257227362, + "eval_loss": 0.807707667350769, + "eval_precision": 0.9080131872801944, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.0138, + "eval_samples_per_second": 450.225, + "eval_steps_per_second": 3.619, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.786254713854789e-06, + "loss": 0.6861, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9158742508070002, + "eval_loss": 0.8018952012062073, + "eval_precision": 0.9133185591651838, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8853, + "eval_samples_per_second": 457.562, + "eval_steps_per_second": 3.678, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.934423317100244e-06, + "loss": 0.6877, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.0364723641785006e-06, + "loss": 0.6876, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.919291398103805, + "eval_loss": 0.802123486995697, + "eval_precision": 0.9170309160000029, + "eval_recall": 0.9226718403547672, + "eval_runtime": 7.6904, + "eval_samples_per_second": 469.157, + "eval_steps_per_second": 3.771, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4374499616677569, + "learning_rate": 0.0003334025288345601, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-lx0qvogl/checkpoint-616/training_args.bin b/run-lx0qvogl/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4d982a2e3dfad0f320127d1cbfca2689e87b3d7 --- /dev/null +++ b/run-lx0qvogl/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b80f3cc0ed345dc4cdd824612d1b579dfbfe6bfb912bd06a77ea683a2d06f0 +size 4792 diff --git a/run-lx0qvogl/checkpoint-630/model.safetensors b/run-lx0qvogl/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4ea6e8ba788eb04ac6d06e49cb2659299a30622 --- /dev/null +++ b/run-lx0qvogl/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4e21879598311b105f0f35a99bca6371c09231a5a18e098b1e346009e15154 +size 198025308 diff --git a/run-lx0qvogl/checkpoint-630/optimizer.pt b/run-lx0qvogl/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c3b4f86c6144286325d38b25cf704bd5a2991eb --- /dev/null +++ b/run-lx0qvogl/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7426027fe15e4b5ea9c35c53d01f54e1817adaee34af493433ca5496ea5be833 +size 395900602 diff --git a/run-lx0qvogl/checkpoint-630/rng_state.pth b/run-lx0qvogl/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-lx0qvogl/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-lx0qvogl/checkpoint-630/scheduler.pt b/run-lx0qvogl/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..17fbf7ea1aac40b91bd5ca9eccf33d096eb2ed80 --- /dev/null +++ b/run-lx0qvogl/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96db911408a323652a63c959650e0f8cff021469db02d02b6f7e3351752a944 +size 1064 diff --git a/run-lx0qvogl/checkpoint-630/trainer_state.json b/run-lx0qvogl/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..246c98e829b97efcca54ece51b3191c508855ff9 --- /dev/null +++ b/run-lx0qvogl/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.919291398103805, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-lx0qvogl/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.562350394578191e-05, + "loss": 1.3781, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9621445536613464, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.1395, + "eval_samples_per_second": 443.272, + "eval_steps_per_second": 3.563, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 9.124700789156382e-05, + "loss": 0.9936, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001368705118373457, + "loss": 0.87, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8930155210643016, + "eval_f1": 0.8863523976196696, + "eval_loss": 0.951144814491272, + "eval_precision": 0.8885140313409388, + "eval_recall": 0.8930155210643016, + "eval_runtime": 7.8768, + "eval_samples_per_second": 458.054, + "eval_steps_per_second": 3.682, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018249401578312763, + "loss": 0.8107, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9089018492475383, + "eval_loss": 0.8008581399917603, + "eval_precision": 0.9051878951577181, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.8797, + "eval_samples_per_second": 457.886, + "eval_steps_per_second": 3.68, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00022811751972890952, + "loss": 0.796, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0002737410236746914, + "loss": 0.7828, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8932926829268293, + "eval_f1": 0.8707019283703918, + "eval_loss": 0.8425114750862122, + "eval_precision": 0.8861667655804502, + "eval_recall": 0.8932926829268293, + "eval_runtime": 7.7019, + "eval_samples_per_second": 468.457, + "eval_steps_per_second": 3.765, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00031936452762047334, + "loss": 0.7937, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00033316978134108165, + "loss": 0.7667, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.6047671840354767, + "eval_f1": 0.6650227310603198, + "eval_loss": 1.220944881439209, + "eval_precision": 0.8763632011964403, + "eval_recall": 0.6047671840354767, + "eval_runtime": 8.2623, + "eval_samples_per_second": 436.681, + "eval_steps_per_second": 3.51, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00033201340055673435, + "loss": 0.7734, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8791574279379157, + "eval_f1": 0.8830665613028168, + "eval_loss": 0.8561235666275024, + "eval_precision": 0.8919972087302618, + "eval_recall": 0.8791574279379157, + "eval_runtime": 7.7476, + "eval_samples_per_second": 465.693, + "eval_steps_per_second": 3.743, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00032989413856437017, + "loss": 0.7703, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00032682433927046185, + "loss": 0.7597, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.908999186603767, + "eval_loss": 0.8065426349639893, + "eval_precision": 0.9048440615575271, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.8646, + "eval_samples_per_second": 458.765, + "eval_steps_per_second": 3.687, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00032282188310485264, + "loss": 0.7503, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.000317910082873957, + "loss": 0.7423, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.799889135254989, + "eval_f1": 0.8263020278336493, + "eval_loss": 0.9863446354866028, + "eval_precision": 0.884224920712868, + "eval_recall": 0.799889135254989, + "eval_runtime": 8.0924, + "eval_samples_per_second": 445.853, + "eval_steps_per_second": 3.584, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0003121175479724044, + "loss": 0.7391, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.8870507643540921, + "eval_loss": 0.8355196714401245, + "eval_precision": 0.8969987604587748, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.2242, + "eval_samples_per_second": 499.435, + "eval_steps_per_second": 4.014, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003054780177440432, + "loss": 0.7362, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0002980301649629129, + "loss": 0.7363, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9074532266216762, + "eval_loss": 0.8040934801101685, + "eval_precision": 0.9038499409721593, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.2736, + "eval_samples_per_second": 436.087, + "eval_steps_per_second": 3.505, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00028981737057883687, + "loss": 0.7254, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8589246119733924, + "eval_f1": 0.8713494645399834, + "eval_loss": 0.8952365517616272, + "eval_precision": 0.8941123985461512, + "eval_recall": 0.8589246119733924, + "eval_runtime": 7.8435, + "eval_samples_per_second": 459.997, + "eval_steps_per_second": 3.697, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002808874710396549, + "loss": 0.7327, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00027129247966185143, + "loss": 0.7206, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8942101012427945, + "eval_loss": 0.8449982404708862, + "eval_precision": 0.9058863819587922, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.5677, + "eval_samples_per_second": 476.76, + "eval_steps_per_second": 3.832, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00026108828367249015, + "loss": 0.7137, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.000250334318687071, + "loss": 0.7136, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8871951219512195, + "eval_f1": 0.8898270475008228, + "eval_loss": 0.8576585054397583, + "eval_precision": 0.8969080950837554, + "eval_recall": 0.8871951219512195, + "eval_runtime": 7.7891, + "eval_samples_per_second": 463.212, + "eval_steps_per_second": 3.723, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00023909322251935603, + "loss": 0.7155, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.8985044393952527, + "eval_loss": 0.824130654335022, + "eval_precision": 0.8974521897966055, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.0383, + "eval_samples_per_second": 448.853, + "eval_steps_per_second": 3.608, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00022743047033959112, + "loss": 0.7144, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00021541399330618903, + "loss": 0.7111, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9078723989223497, + "eval_loss": 0.819262683391571, + "eval_precision": 0.9058314035472377, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.8855, + "eval_samples_per_second": 457.548, + "eval_steps_per_second": 3.678, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00020311378289220318, + "loss": 0.7092, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00019060148321124132, + "loss": 0.7096, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.899719290953105, + "eval_loss": 0.8234288096427917, + "eval_precision": 0.9008266238288629, + "eval_recall": 0.9090909090909091, + "eval_runtime": 8.0847, + "eval_samples_per_second": 446.274, + "eval_steps_per_second": 3.587, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00017794997371736656, + "loss": 0.7067, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9027161862527716, + "eval_f1": 0.9018413144748951, + "eval_loss": 0.8304429650306702, + "eval_precision": 0.9029355611183134, + "eval_recall": 0.9027161862527716, + "eval_runtime": 8.1806, + "eval_samples_per_second": 441.046, + "eval_steps_per_second": 3.545, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0001652329447096047, + "loss": 0.7066, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00015252446811358283, + "loss": 0.7027, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9063090766601901, + "eval_loss": 0.817476749420166, + "eval_precision": 0.9053993216547231, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.0902, + "eval_samples_per_second": 508.868, + "eval_steps_per_second": 4.09, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013989856604033644, + "loss": 0.699, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00012742877963526782, + "loss": 0.7009, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9043528433467256, + "eval_loss": 0.8197091221809387, + "eval_precision": 0.9005086256404702, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.5623, + "eval_samples_per_second": 477.101, + "eval_steps_per_second": 3.835, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001151877407285488, + "loss": 0.697, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.901439631090545, + "eval_loss": 0.8282056450843811, + "eval_precision": 0.902060147136351, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.9649, + "eval_samples_per_second": 452.987, + "eval_steps_per_second": 3.641, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00010324674878194386, + "loss": 0.6974, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 9.167535559617973e-05, + "loss": 0.6942, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9139313584199102, + "eval_loss": 0.8102231025695801, + "eval_precision": 0.9134147170386875, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.7971, + "eval_samples_per_second": 462.737, + "eval_steps_per_second": 3.719, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 8.05409601977848e-05, + "loss": 0.6928, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9135441143533733, + "eval_loss": 0.8085286617279053, + "eval_precision": 0.9113853190522553, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.9084, + "eval_samples_per_second": 456.222, + "eval_steps_per_second": 3.667, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.990841626503174e-05, + "loss": 0.6932, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.98396543795787e-05, + "loss": 0.6905, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9159633804214986, + "eval_loss": 0.8035997152328491, + "eval_precision": 0.9125647049088704, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.718, + "eval_samples_per_second": 467.476, + "eval_steps_per_second": 3.757, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 5.0393321304052365e-05, + "loss": 0.6879, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 4.1624438386646015e-05, + "loss": 0.6876, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9109036857849093, + "eval_loss": 0.8103654384613037, + "eval_precision": 0.9105360018186442, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.3571, + "eval_samples_per_second": 490.414, + "eval_steps_per_second": 3.942, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.358408108239676e-05, + "loss": 0.6901, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9115018835295827, + "eval_loss": 0.8098926544189453, + "eval_precision": 0.9089303954806013, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.4893, + "eval_samples_per_second": 481.753, + "eval_steps_per_second": 3.872, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.6319081457810925e-05, + "loss": 0.6847, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.9871755411636988e-05, + "loss": 0.6863, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9139224661820099, + "eval_loss": 0.8063018918037415, + "eval_precision": 0.9116659965561222, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0274, + "eval_samples_per_second": 449.461, + "eval_steps_per_second": 3.613, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.427965620062186e-05, + "loss": 0.6872, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.575355705871114e-06, + "loss": 0.6856, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9109377257227362, + "eval_loss": 0.807707667350769, + "eval_precision": 0.9080131872801944, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.0138, + "eval_samples_per_second": 450.225, + "eval_steps_per_second": 3.619, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.786254713854789e-06, + "loss": 0.6861, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9158742508070002, + "eval_loss": 0.8018952012062073, + "eval_precision": 0.9133185591651838, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8853, + "eval_samples_per_second": 457.562, + "eval_steps_per_second": 3.678, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.934423317100244e-06, + "loss": 0.6877, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.0364723641785006e-06, + "loss": 0.6876, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.919291398103805, + "eval_loss": 0.802123486995697, + "eval_precision": 0.9170309160000029, + "eval_recall": 0.9226718403547672, + "eval_runtime": 7.6904, + "eval_samples_per_second": 469.157, + "eval_steps_per_second": 3.771, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.0345670773975161e-07, + "loss": 0.6834, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9144649576186598, + "eval_loss": 0.8118080496788025, + "eval_precision": 0.9129473805911944, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.507, + "eval_samples_per_second": 480.619, + "eval_steps_per_second": 3.863, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4374499616677569, + "learning_rate": 0.0003334025288345601, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-lx0qvogl/checkpoint-630/training_args.bin b/run-lx0qvogl/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4d982a2e3dfad0f320127d1cbfca2689e87b3d7 --- /dev/null +++ b/run-lx0qvogl/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b80f3cc0ed345dc4cdd824612d1b579dfbfe6bfb912bd06a77ea683a2d06f0 +size 4792 diff --git a/run-maj9vrn9/checkpoint-552/model.safetensors b/run-maj9vrn9/checkpoint-552/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5107f0bd78c6e9b3acfaf57690b630a2d3af163 --- /dev/null +++ b/run-maj9vrn9/checkpoint-552/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa5fc38a790849ea3900e6496f5bdfd8c3db908d664399ed298107cf19bd02da +size 198025308 diff --git a/run-maj9vrn9/checkpoint-552/optimizer.pt b/run-maj9vrn9/checkpoint-552/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5125deb6a67f6d6329b56edadb0e1b3db9c824b4 --- /dev/null +++ b/run-maj9vrn9/checkpoint-552/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4af9d7dcfef31f409f4475fb8f3780c0eaf8ea588a144219ec138150d08534d +size 395900602 diff --git a/run-maj9vrn9/checkpoint-552/rng_state.pth b/run-maj9vrn9/checkpoint-552/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a07d02214d4d2a0bd650d84451df8b01ad9e2e1f --- /dev/null +++ b/run-maj9vrn9/checkpoint-552/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea0e7f2a9ffdc1f2f52e0b770bd1a5190fc0a00c767b73b57597c52b6f4dee6 +size 14244 diff --git a/run-maj9vrn9/checkpoint-552/scheduler.pt b/run-maj9vrn9/checkpoint-552/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..67842bb110c02d2c64136df9c7fe088bfceea2bd --- /dev/null +++ b/run-maj9vrn9/checkpoint-552/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:175455616520b82ac0f813b7aec4a637fd3518892a547074a8155d461bcc8ec4 +size 1064 diff --git a/run-maj9vrn9/checkpoint-552/trainer_state.json b/run-maj9vrn9/checkpoint-552/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..918cc0a3ed19be5174300722c654e88ed74f450f --- /dev/null +++ b/run-maj9vrn9/checkpoint-552/trainer_state.json @@ -0,0 +1,592 @@ +{ + "best_metric": 0.9191897696512, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-maj9vrn9/checkpoint-552", + "epoch": 25.976470588235294, + "eval_steps": 500, + "global_step": 552, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.3834492333682036e-05, + "loss": 1.3762, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9557070136070251, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.4032, + "eval_samples_per_second": 429.358, + "eval_steps_per_second": 3.451, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 8.766898466736407e-05, + "loss": 0.9965, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001315034770010461, + "loss": 0.8671, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8949556541019955, + "eval_f1": 0.8897266968710041, + "eval_loss": 0.8883252739906311, + "eval_precision": 0.892273979145637, + "eval_recall": 0.8949556541019955, + "eval_runtime": 7.9625, + "eval_samples_per_second": 453.127, + "eval_steps_per_second": 3.642, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017533796933472814, + "loss": 0.8093, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9098818881944652, + "eval_loss": 0.8194078207015991, + "eval_precision": 0.9074569127170212, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.0671, + "eval_samples_per_second": 447.251, + "eval_steps_per_second": 3.595, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0002191724616684102, + "loss": 0.7959, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0002630069540020922, + "loss": 0.7788, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9074279379157428, + "eval_f1": 0.8948089221065889, + "eval_loss": 0.8196240663528442, + "eval_precision": 0.8978819425558625, + "eval_recall": 0.9074279379157428, + "eval_runtime": 7.9168, + "eval_samples_per_second": 455.742, + "eval_steps_per_second": 3.663, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00030684144633577425, + "loss": 0.7894, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00032010536155586966, + "loss": 0.7655, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.900275192580414, + "eval_loss": 0.8347921967506409, + "eval_precision": 0.90316955728625, + "eval_recall": 0.9032705099778271, + "eval_runtime": 8.0419, + "eval_samples_per_second": 448.649, + "eval_steps_per_second": 3.606, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00031899432535210665, + "loss": 0.7752, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8938470066518847, + "eval_f1": 0.8897136403718807, + "eval_loss": 0.8322799801826477, + "eval_precision": 0.8888992843689303, + "eval_recall": 0.8938470066518847, + "eval_runtime": 8.0983, + "eval_samples_per_second": 445.523, + "eval_steps_per_second": 3.581, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00031695816491892845, + "loss": 0.7692, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00031400874012738506, + "loss": 0.7566, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9077031480969429, + "eval_loss": 0.8084025382995605, + "eval_precision": 0.9042225327224769, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.3357, + "eval_samples_per_second": 432.838, + "eval_steps_per_second": 3.479, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0003101632302709788, + "loss": 0.7499, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003054440340027211, + "loss": 0.7385, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.866130820399113, + "eval_f1": 0.8754539340406154, + "eval_loss": 0.8858188986778259, + "eval_precision": 0.8933476266277014, + "eval_recall": 0.866130820399113, + "eval_runtime": 8.0461, + "eval_samples_per_second": 448.417, + "eval_steps_per_second": 3.604, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00029987863887137746, + "loss": 0.7376, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.8973718948674221, + "eval_loss": 0.8236659169197083, + "eval_precision": 0.9006743803991627, + "eval_recall": 0.9090909090909091, + "eval_runtime": 8.2306, + "eval_samples_per_second": 438.362, + "eval_steps_per_second": 3.523, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00029349946121680234, + "loss": 0.7338, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00028634365735691423, + "loss": 0.7379, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8888580931263859, + "eval_f1": 0.8915931071439344, + "eval_loss": 0.844833254814148, + "eval_precision": 0.902326418651204, + "eval_recall": 0.8888580931263859, + "eval_runtime": 8.1172, + "eval_samples_per_second": 444.488, + "eval_steps_per_second": 3.573, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.000278452907166076, + "loss": 0.7296, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8899667405764967, + "eval_f1": 0.8915129874591473, + "eval_loss": 0.8468584418296814, + "eval_precision": 0.8976300322319868, + "eval_recall": 0.8899667405764967, + "eval_runtime": 7.9986, + "eval_samples_per_second": 451.076, + "eval_steps_per_second": 3.626, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00026987317130545464, + "loss": 0.7347, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00026065442351940355, + "loss": 0.7244, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8151330376940134, + "eval_f1": 0.8437268379883239, + "eval_loss": 0.9695701003074646, + "eval_precision": 0.9033513217105682, + "eval_recall": 0.8151330376940134, + "eval_runtime": 8.0861, + "eval_samples_per_second": 446.199, + "eval_steps_per_second": 3.586, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025085035955713965, + "loss": 0.7158, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00024051808441513745, + "loss": 0.7175, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9049334811529933, + "eval_f1": 0.8924746711268441, + "eval_loss": 0.8340050578117371, + "eval_precision": 0.8948231915122807, + "eval_recall": 0.9049334811529933, + "eval_runtime": 7.9214, + "eval_samples_per_second": 455.477, + "eval_steps_per_second": 3.661, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00022971777972193686, + "loss": 0.713, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.9048763918164103, + "eval_loss": 0.8189122676849365, + "eval_precision": 0.9079378122443935, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.7194, + "eval_samples_per_second": 467.394, + "eval_steps_per_second": 3.757, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00021851235320272266, + "loss": 0.7143, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002069670722654125, + "loss": 0.7112, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.908363397092284, + "eval_loss": 0.8125548362731934, + "eval_precision": 0.9048052439529913, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.7002, + "eval_samples_per_second": 468.556, + "eval_steps_per_second": 3.766, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00019514918384247852, + "loss": 0.7044, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00018312752270278074, + "loss": 0.7108, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9071173775841977, + "eval_loss": 0.81267911195755, + "eval_precision": 0.9048266961224766, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.1374, + "eval_samples_per_second": 443.385, + "eval_steps_per_second": 3.564, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001709721105148479, + "loss": 0.7034, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8833148558758315, + "eval_f1": 0.886884051034353, + "eval_loss": 0.8552131056785583, + "eval_precision": 0.8964636643019317, + "eval_recall": 0.8833148558758315, + "eval_runtime": 7.6464, + "eval_samples_per_second": 471.857, + "eval_steps_per_second": 3.793, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00015875374799691406, + "loss": 0.7038, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00014654360252928166, + "loss": 0.7025, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9086861568598175, + "eval_loss": 0.8110377192497253, + "eval_precision": 0.9066207891178785, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.6555, + "eval_samples_per_second": 471.293, + "eval_steps_per_second": 3.788, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013441279363101622, + "loss": 0.699, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.0001224319787154146, + "loss": 0.6992, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9074354654220043, + "eval_loss": 0.8157398700714111, + "eval_precision": 0.9070102033150522, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.8159, + "eval_samples_per_second": 461.621, + "eval_steps_per_second": 3.71, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00011067094153706593, + "loss": 0.6958, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9078195388986705, + "eval_loss": 0.8167528510093689, + "eval_precision": 0.9069128118146116, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.021, + "eval_samples_per_second": 449.82, + "eval_steps_per_second": 3.616, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 9.919818572764708e-05, + "loss": 0.6957, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 8.808053578795424e-05, + "loss": 0.696, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9135521677964489, + "eval_loss": 0.8060488104820251, + "eval_precision": 0.911974478268099, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.1539, + "eval_samples_per_second": 442.485, + "eval_steps_per_second": 3.557, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 7.73827478602418e-05, + "loss": 0.6935, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9121397659829285, + "eval_loss": 0.8111850619316101, + "eval_precision": 0.9106312851484463, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.7402, + "eval_samples_per_second": 466.141, + "eval_steps_per_second": 3.747, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.716713254797485e-05, + "loss": 0.6929, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.7493191979927635e-05, + "loss": 0.6922, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9215631929046563, + "eval_f1": 0.9162938378758622, + "eval_loss": 0.8042648434638977, + "eval_precision": 0.9144447441793278, + "eval_recall": 0.9215631929046563, + "eval_runtime": 8.2635, + "eval_samples_per_second": 436.621, + "eval_steps_per_second": 3.509, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 4.841727323259413e-05, + "loss": 0.6876, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.9992240129595434e-05, + "loss": 0.6887, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9101806778004129, + "eval_loss": 0.8114057183265686, + "eval_precision": 0.909145717062132, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.9189, + "eval_samples_per_second": 455.616, + "eval_steps_per_second": 3.662, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.226716532972875e-05, + "loss": 0.6897, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9127370978279324, + "eval_loss": 0.8117974400520325, + "eval_precision": 0.9099079535582795, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.8104, + "eval_samples_per_second": 461.951, + "eval_steps_per_second": 3.713, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.528704449712984e-05, + "loss": 0.6855, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.9092534218401262e-05, + "loss": 0.6855, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9191897696512, + "eval_loss": 0.8045447468757629, + "eval_precision": 0.9172816458838393, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.9585, + "eval_samples_per_second": 453.349, + "eval_steps_per_second": 3.644, + "step": 552 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4188958855648858, + "learning_rate": 0.00032032898243844563, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-maj9vrn9/checkpoint-552/training_args.bin b/run-maj9vrn9/checkpoint-552/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b8db8041fe298cb68856f23038619cd8a1bcc44 --- /dev/null +++ b/run-maj9vrn9/checkpoint-552/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41f6c63abfa42924d2f0655763aeecab36369f45771eacb4cf67f7cbba8e9fc7 +size 4792 diff --git a/run-maj9vrn9/checkpoint-630/model.safetensors b/run-maj9vrn9/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0263fe5975736b0b38b041999490198ba6a8456 --- /dev/null +++ b/run-maj9vrn9/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3574d51491c56264e1b09d8a6fec74545fd9d6b61c8a5ad5ab7a4e52083927cd +size 198025308 diff --git a/run-maj9vrn9/checkpoint-630/optimizer.pt b/run-maj9vrn9/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..eafc4ab9994aaa2d52abc03a363e97a8a7eaa99a --- /dev/null +++ b/run-maj9vrn9/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b56f6a20f1cf318df1f82701c8b460173109169882a7b020f6845d5cfb695a +size 395900602 diff --git a/run-maj9vrn9/checkpoint-630/rng_state.pth b/run-maj9vrn9/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-maj9vrn9/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-maj9vrn9/checkpoint-630/scheduler.pt b/run-maj9vrn9/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a1a5118cc18622bbfd8e8fbe24874acf2e6dbff --- /dev/null +++ b/run-maj9vrn9/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2355a1b1561f8f72b1b6351e34430ecfb92eb31fcb6ff134fcf0ada58a7f3309 +size 1064 diff --git a/run-maj9vrn9/checkpoint-630/trainer_state.json b/run-maj9vrn9/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..67733bd126f79dfcfa1ccb774a2fb28d78f0bc8c --- /dev/null +++ b/run-maj9vrn9/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9191897696512, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-maj9vrn9/checkpoint-552", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.3834492333682036e-05, + "loss": 1.3762, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9557070136070251, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.4032, + "eval_samples_per_second": 429.358, + "eval_steps_per_second": 3.451, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 8.766898466736407e-05, + "loss": 0.9965, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001315034770010461, + "loss": 0.8671, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8949556541019955, + "eval_f1": 0.8897266968710041, + "eval_loss": 0.8883252739906311, + "eval_precision": 0.892273979145637, + "eval_recall": 0.8949556541019955, + "eval_runtime": 7.9625, + "eval_samples_per_second": 453.127, + "eval_steps_per_second": 3.642, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017533796933472814, + "loss": 0.8093, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9098818881944652, + "eval_loss": 0.8194078207015991, + "eval_precision": 0.9074569127170212, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.0671, + "eval_samples_per_second": 447.251, + "eval_steps_per_second": 3.595, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0002191724616684102, + "loss": 0.7959, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0002630069540020922, + "loss": 0.7788, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9074279379157428, + "eval_f1": 0.8948089221065889, + "eval_loss": 0.8196240663528442, + "eval_precision": 0.8978819425558625, + "eval_recall": 0.9074279379157428, + "eval_runtime": 7.9168, + "eval_samples_per_second": 455.742, + "eval_steps_per_second": 3.663, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00030684144633577425, + "loss": 0.7894, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00032010536155586966, + "loss": 0.7655, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.900275192580414, + "eval_loss": 0.8347921967506409, + "eval_precision": 0.90316955728625, + "eval_recall": 0.9032705099778271, + "eval_runtime": 8.0419, + "eval_samples_per_second": 448.649, + "eval_steps_per_second": 3.606, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00031899432535210665, + "loss": 0.7752, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8938470066518847, + "eval_f1": 0.8897136403718807, + "eval_loss": 0.8322799801826477, + "eval_precision": 0.8888992843689303, + "eval_recall": 0.8938470066518847, + "eval_runtime": 8.0983, + "eval_samples_per_second": 445.523, + "eval_steps_per_second": 3.581, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00031695816491892845, + "loss": 0.7692, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00031400874012738506, + "loss": 0.7566, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9077031480969429, + "eval_loss": 0.8084025382995605, + "eval_precision": 0.9042225327224769, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.3357, + "eval_samples_per_second": 432.838, + "eval_steps_per_second": 3.479, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0003101632302709788, + "loss": 0.7499, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003054440340027211, + "loss": 0.7385, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.866130820399113, + "eval_f1": 0.8754539340406154, + "eval_loss": 0.8858188986778259, + "eval_precision": 0.8933476266277014, + "eval_recall": 0.866130820399113, + "eval_runtime": 8.0461, + "eval_samples_per_second": 448.417, + "eval_steps_per_second": 3.604, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00029987863887137746, + "loss": 0.7376, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.8973718948674221, + "eval_loss": 0.8236659169197083, + "eval_precision": 0.9006743803991627, + "eval_recall": 0.9090909090909091, + "eval_runtime": 8.2306, + "eval_samples_per_second": 438.362, + "eval_steps_per_second": 3.523, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00029349946121680234, + "loss": 0.7338, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00028634365735691423, + "loss": 0.7379, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8888580931263859, + "eval_f1": 0.8915931071439344, + "eval_loss": 0.844833254814148, + "eval_precision": 0.902326418651204, + "eval_recall": 0.8888580931263859, + "eval_runtime": 8.1172, + "eval_samples_per_second": 444.488, + "eval_steps_per_second": 3.573, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.000278452907166076, + "loss": 0.7296, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8899667405764967, + "eval_f1": 0.8915129874591473, + "eval_loss": 0.8468584418296814, + "eval_precision": 0.8976300322319868, + "eval_recall": 0.8899667405764967, + "eval_runtime": 7.9986, + "eval_samples_per_second": 451.076, + "eval_steps_per_second": 3.626, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00026987317130545464, + "loss": 0.7347, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00026065442351940355, + "loss": 0.7244, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8151330376940134, + "eval_f1": 0.8437268379883239, + "eval_loss": 0.9695701003074646, + "eval_precision": 0.9033513217105682, + "eval_recall": 0.8151330376940134, + "eval_runtime": 8.0861, + "eval_samples_per_second": 446.199, + "eval_steps_per_second": 3.586, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025085035955713965, + "loss": 0.7158, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00024051808441513745, + "loss": 0.7175, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9049334811529933, + "eval_f1": 0.8924746711268441, + "eval_loss": 0.8340050578117371, + "eval_precision": 0.8948231915122807, + "eval_recall": 0.9049334811529933, + "eval_runtime": 7.9214, + "eval_samples_per_second": 455.477, + "eval_steps_per_second": 3.661, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00022971777972193686, + "loss": 0.713, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.9048763918164103, + "eval_loss": 0.8189122676849365, + "eval_precision": 0.9079378122443935, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.7194, + "eval_samples_per_second": 467.394, + "eval_steps_per_second": 3.757, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00021851235320272266, + "loss": 0.7143, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002069670722654125, + "loss": 0.7112, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.908363397092284, + "eval_loss": 0.8125548362731934, + "eval_precision": 0.9048052439529913, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.7002, + "eval_samples_per_second": 468.556, + "eval_steps_per_second": 3.766, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00019514918384247852, + "loss": 0.7044, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00018312752270278074, + "loss": 0.7108, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9071173775841977, + "eval_loss": 0.81267911195755, + "eval_precision": 0.9048266961224766, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.1374, + "eval_samples_per_second": 443.385, + "eval_steps_per_second": 3.564, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001709721105148479, + "loss": 0.7034, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8833148558758315, + "eval_f1": 0.886884051034353, + "eval_loss": 0.8552131056785583, + "eval_precision": 0.8964636643019317, + "eval_recall": 0.8833148558758315, + "eval_runtime": 7.6464, + "eval_samples_per_second": 471.857, + "eval_steps_per_second": 3.793, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00015875374799691406, + "loss": 0.7038, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00014654360252928166, + "loss": 0.7025, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9086861568598175, + "eval_loss": 0.8110377192497253, + "eval_precision": 0.9066207891178785, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.6555, + "eval_samples_per_second": 471.293, + "eval_steps_per_second": 3.788, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013441279363101622, + "loss": 0.699, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.0001224319787154146, + "loss": 0.6992, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9074354654220043, + "eval_loss": 0.8157398700714111, + "eval_precision": 0.9070102033150522, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.8159, + "eval_samples_per_second": 461.621, + "eval_steps_per_second": 3.71, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00011067094153706593, + "loss": 0.6958, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9078195388986705, + "eval_loss": 0.8167528510093689, + "eval_precision": 0.9069128118146116, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.021, + "eval_samples_per_second": 449.82, + "eval_steps_per_second": 3.616, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 9.919818572764708e-05, + "loss": 0.6957, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 8.808053578795424e-05, + "loss": 0.696, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9135521677964489, + "eval_loss": 0.8060488104820251, + "eval_precision": 0.911974478268099, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.1539, + "eval_samples_per_second": 442.485, + "eval_steps_per_second": 3.557, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 7.73827478602418e-05, + "loss": 0.6935, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9121397659829285, + "eval_loss": 0.8111850619316101, + "eval_precision": 0.9106312851484463, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.7402, + "eval_samples_per_second": 466.141, + "eval_steps_per_second": 3.747, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.716713254797485e-05, + "loss": 0.6929, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.7493191979927635e-05, + "loss": 0.6922, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9215631929046563, + "eval_f1": 0.9162938378758622, + "eval_loss": 0.8042648434638977, + "eval_precision": 0.9144447441793278, + "eval_recall": 0.9215631929046563, + "eval_runtime": 8.2635, + "eval_samples_per_second": 436.621, + "eval_steps_per_second": 3.509, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 4.841727323259413e-05, + "loss": 0.6876, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.9992240129595434e-05, + "loss": 0.6887, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9101806778004129, + "eval_loss": 0.8114057183265686, + "eval_precision": 0.909145717062132, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.9189, + "eval_samples_per_second": 455.616, + "eval_steps_per_second": 3.662, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.226716532972875e-05, + "loss": 0.6897, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9127370978279324, + "eval_loss": 0.8117974400520325, + "eval_precision": 0.9099079535582795, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.8104, + "eval_samples_per_second": 461.951, + "eval_steps_per_second": 3.713, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.528704449712984e-05, + "loss": 0.6855, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.9092534218401262e-05, + "loss": 0.6855, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9191897696512, + "eval_loss": 0.8045447468757629, + "eval_precision": 0.9172816458838393, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.9585, + "eval_samples_per_second": 453.349, + "eval_steps_per_second": 3.644, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.3719715193239672e-05, + "loss": 0.6877, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.199882077888756e-06, + "loss": 0.6857, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9116415398539909, + "eval_loss": 0.8081678748130798, + "eval_precision": 0.9092406509970001, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.8264, + "eval_samples_per_second": 461.001, + "eval_steps_per_second": 3.705, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.559361205500947e-06, + "loss": 0.687, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9174446716856375, + "eval_loss": 0.8016456365585327, + "eval_precision": 0.9148857667777416, + "eval_recall": 0.9212860310421286, + "eval_runtime": 7.7221, + "eval_samples_per_second": 467.232, + "eval_steps_per_second": 3.755, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.8193572451179687e-06, + "loss": 0.6876, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 9.958296924244963e-07, + "loss": 0.6879, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9218403547671841, + "eval_f1": 0.9178938004403995, + "eval_loss": 0.8014368414878845, + "eval_precision": 0.9155525867913387, + "eval_recall": 0.9218403547671841, + "eval_runtime": 7.6753, + "eval_samples_per_second": 470.082, + "eval_steps_per_second": 3.778, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 9.939991166998919e-08, + "loss": 0.6823, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9151011668864663, + "eval_loss": 0.8091979026794434, + "eval_precision": 0.9129090804088738, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.7013, + "eval_samples_per_second": 468.49, + "eval_steps_per_second": 3.766, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4188958855648858, + "learning_rate": 0.00032032898243844563, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-maj9vrn9/checkpoint-630/training_args.bin b/run-maj9vrn9/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b8db8041fe298cb68856f23038619cd8a1bcc44 --- /dev/null +++ b/run-maj9vrn9/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41f6c63abfa42924d2f0655763aeecab36369f45771eacb4cf67f7cbba8e9fc7 +size 4792 diff --git a/run-mrgxuzuz/checkpoint-1232/model.safetensors b/run-mrgxuzuz/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..249a0b4b714aad18cb226478162fdea7ea7d61ba --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fddeed2d0efc1a074e59053bc565230392c030d89ed6e27b906f96237b50807a +size 198025308 diff --git a/run-mrgxuzuz/checkpoint-1232/optimizer.pt b/run-mrgxuzuz/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..23632ee16692787666caa54b3672f1d44f34c116 --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e22624f660458d11552707f1ca5e1628eeab07d615a58239e6dbca84708b70 +size 395900602 diff --git a/run-mrgxuzuz/checkpoint-1232/rng_state.pth b/run-mrgxuzuz/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-mrgxuzuz/checkpoint-1232/scheduler.pt b/run-mrgxuzuz/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..59cfbd3d0d218d779cbaf11f4c677e50851825dd --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c666b38654a49d8f72b7c1ba964a19a6a80dc88e5c02c65c4c9db09ccc810533 +size 1064 diff --git a/run-mrgxuzuz/checkpoint-1232/trainer_state.json b/run-mrgxuzuz/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..780604cf795e94498a2b75a501b83e2f8067a957 --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.924889135254989, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-mrgxuzuz/checkpoint-765", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.631957386919613e-06, + "loss": 1.4975, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8245565410199557, + "eval_loss": 1.2128942012786865, + "eval_runtime": 6.9648, + "eval_samples_per_second": 518.035, + "eval_steps_per_second": 8.184, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 9.263914773839225e-06, + "loss": 1.2573, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 1.3895872160758839e-05, + "loss": 0.9965, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8497782705099778, + "eval_loss": 0.9234672784805298, + "eval_runtime": 6.8345, + "eval_samples_per_second": 527.906, + "eval_steps_per_second": 8.34, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 1.852782954767845e-05, + "loss": 0.9022, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8587346076965332, + "eval_runtime": 6.864, + "eval_samples_per_second": 525.64, + "eval_steps_per_second": 8.304, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 2.3159786934598065e-05, + "loss": 0.8645, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 2.7791744321517678e-05, + "loss": 0.8329, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8256401419639587, + "eval_runtime": 6.7236, + "eval_samples_per_second": 536.616, + "eval_steps_per_second": 8.478, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 3.242370170843729e-05, + "loss": 0.8118, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 3.70556590953569e-05, + "loss": 0.7988, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8035308122634888, + "eval_runtime": 6.9331, + "eval_samples_per_second": 520.404, + "eval_steps_per_second": 8.221, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 4.168761648227652e-05, + "loss": 0.795, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8094593286514282, + "eval_runtime": 6.7592, + "eval_samples_per_second": 533.79, + "eval_steps_per_second": 8.433, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 4.631957386919613e-05, + "loss": 0.7829, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 5.0951531256115746e-05, + "loss": 0.7754, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.7991108298301697, + "eval_runtime": 6.8593, + "eval_samples_per_second": 526.004, + "eval_steps_per_second": 8.31, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 5.5583488643035356e-05, + "loss": 0.7806, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 6.021544602995497e-05, + "loss": 0.7664, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8119838237762451, + "eval_runtime": 6.7401, + "eval_samples_per_second": 535.302, + "eval_steps_per_second": 8.457, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 6.484740341687458e-05, + "loss": 0.7582, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8043816685676575, + "eval_runtime": 6.575, + "eval_samples_per_second": 548.741, + "eval_steps_per_second": 8.669, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 6.94793608037942e-05, + "loss": 0.7673, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 7.41113181907138e-05, + "loss": 0.7552, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8017526865005493, + "eval_runtime": 6.9019, + "eval_samples_per_second": 522.757, + "eval_steps_per_second": 8.259, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 7.526261054058212e-05, + "loss": 0.7448, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8017557859420776, + "eval_runtime": 6.8249, + "eval_samples_per_second": 528.649, + "eval_steps_per_second": 8.352, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 7.482220102905957e-05, + "loss": 0.7433, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 7.402834157299292e-05, + "loss": 0.7379, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8209928274154663, + "eval_runtime": 6.8309, + "eval_samples_per_second": 528.19, + "eval_steps_per_second": 8.344, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 7.28885864763199e-05, + "loss": 0.7373, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 7.14137815585984e-05, + "loss": 0.7376, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.7964752316474915, + "eval_runtime": 6.9023, + "eval_samples_per_second": 522.721, + "eval_steps_per_second": 8.258, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 6.961796094704345e-05, + "loss": 0.734, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8214039206504822, + "eval_runtime": 6.5898, + "eval_samples_per_second": 547.515, + "eval_steps_per_second": 8.65, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 6.751821352888016e-05, + "loss": 0.7351, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 6.513452033484135e-05, + "loss": 0.7235, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8833148558758315, + "eval_loss": 0.8590376973152161, + "eval_runtime": 6.6898, + "eval_samples_per_second": 539.33, + "eval_steps_per_second": 8.52, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 6.248956440125596e-05, + "loss": 0.7229, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 5.960851492006531e-05, + "loss": 0.7195, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8380500674247742, + "eval_runtime": 6.6615, + "eval_samples_per_second": 541.621, + "eval_steps_per_second": 8.557, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 5.651878773077874e-05, + "loss": 0.7184, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8142021298408508, + "eval_runtime": 6.6641, + "eval_samples_per_second": 541.405, + "eval_steps_per_second": 8.553, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 5.324978443350835e-05, + "loss": 0.7185, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 4.983261260566252e-05, + "loss": 0.7146, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.924889135254989, + "eval_loss": 0.7931919097900391, + "eval_runtime": 6.9032, + "eval_samples_per_second": 522.659, + "eval_steps_per_second": 8.257, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 4.629978978469408e-05, + "loss": 0.709, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 4.268493403377998e-05, + "loss": 0.7137, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8139432072639465, + "eval_runtime": 6.9218, + "eval_samples_per_second": 521.25, + "eval_steps_per_second": 8.235, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 3.9022444034985084e-05, + "loss": 0.7151, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8024135828018188, + "eval_runtime": 6.7198, + "eval_samples_per_second": 536.918, + "eval_steps_per_second": 8.482, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 3.534717175411804e-05, + "loss": 0.7077, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 3.1694090792175026e-05, + "loss": 0.7048, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8054723143577576, + "eval_runtime": 6.6421, + "eval_samples_per_second": 543.198, + "eval_steps_per_second": 8.582, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 2.8097963579312558e-05, + "loss": 0.7037, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8053677082061768, + "eval_runtime": 6.7953, + "eval_samples_per_second": 530.957, + "eval_steps_per_second": 8.388, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 2.4593010578305274e-05, + "loss": 0.706, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.12125846453227e-05, + "loss": 0.7035, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7991043925285339, + "eval_runtime": 6.7317, + "eval_samples_per_second": 535.971, + "eval_steps_per_second": 8.467, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.7988853646781905e-05, + "loss": 0.7031, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.495249435246913e-05, + "loss": 0.7033, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.7974876761436462, + "eval_runtime": 6.7983, + "eval_samples_per_second": 530.717, + "eval_steps_per_second": 8.384, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.2132400517819372e-05, + "loss": 0.6977, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.7997455596923828, + "eval_runtime": 6.6493, + "eval_samples_per_second": 542.613, + "eval_steps_per_second": 8.572, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 9.555407933219724e-06, + "loss": 0.6969, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.246039056745749e-06, + "loss": 0.6973, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.8020610809326172, + "eval_runtime": 6.9768, + "eval_samples_per_second": 517.145, + "eval_steps_per_second": 8.17, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.226269660385627e-06, + "loss": 0.6973, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.5153197103280903e-06, + "loss": 0.7022, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.7975735664367676, + "eval_runtime": 6.9057, + "eval_samples_per_second": 522.465, + "eval_steps_per_second": 8.254, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.129470471280839e-06, + "loss": 0.6973, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9235033259423503, + "eval_loss": 0.7965211272239685, + "eval_runtime": 6.3521, + "eval_samples_per_second": 568.0, + "eval_steps_per_second": 8.973, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.0819095752404545e-06, + "loss": 0.6939, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.8260552902698027e-07, + "loss": 0.6946, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.7995522618293762, + "eval_runtime": 6.8135, + "eval_samples_per_second": 529.538, + "eval_steps_per_second": 8.366, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 7.535838364103832e-05, + "metric": "eval/loss", + "warmup_ratio": 0.3355123721844324 + } +} diff --git a/run-mrgxuzuz/checkpoint-1232/training_args.bin b/run-mrgxuzuz/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8c3764fe79631e22db1a7377a79478bd2d4d4bc --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea25aea50bfb28183f3389148d87e35bb2df4cf90d5c30fa06302d7e257b9ec4 +size 4792 diff --git a/run-mrgxuzuz/checkpoint-1260/model.safetensors b/run-mrgxuzuz/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..559ce2851f21371ff6497cb3ff5ae854ffd157ff --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11c7aee769f1c1758a7362dfe500e9f34d30e65c20116c7bb051c2772fa8918 +size 198025308 diff --git a/run-mrgxuzuz/checkpoint-1260/optimizer.pt b/run-mrgxuzuz/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffb67011b52e0931665e2771949056e74a654ea2 --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c628c661121ee06f2d98ad9c033b44f36d1ed32bc13c7ca78854479b643c0ae3 +size 395900602 diff --git a/run-mrgxuzuz/checkpoint-1260/rng_state.pth b/run-mrgxuzuz/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-mrgxuzuz/checkpoint-1260/scheduler.pt b/run-mrgxuzuz/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d108781edc9d98fe9be86d595a8c898f5bc0b06 --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462fe4d99fc6e48c5af6706d39be65ce5598f3981b644311956aff48700a92ff +size 1064 diff --git a/run-mrgxuzuz/checkpoint-1260/trainer_state.json b/run-mrgxuzuz/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..febf733ca111cf93d7f8de6a5c29f04fc9e8b1ea --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9257206208425721, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-mrgxuzuz/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.631957386919613e-06, + "loss": 1.4975, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8245565410199557, + "eval_loss": 1.2128942012786865, + "eval_runtime": 6.9648, + "eval_samples_per_second": 518.035, + "eval_steps_per_second": 8.184, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 9.263914773839225e-06, + "loss": 1.2573, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 1.3895872160758839e-05, + "loss": 0.9965, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8497782705099778, + "eval_loss": 0.9234672784805298, + "eval_runtime": 6.8345, + "eval_samples_per_second": 527.906, + "eval_steps_per_second": 8.34, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 1.852782954767845e-05, + "loss": 0.9022, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8587346076965332, + "eval_runtime": 6.864, + "eval_samples_per_second": 525.64, + "eval_steps_per_second": 8.304, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 2.3159786934598065e-05, + "loss": 0.8645, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 2.7791744321517678e-05, + "loss": 0.8329, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8256401419639587, + "eval_runtime": 6.7236, + "eval_samples_per_second": 536.616, + "eval_steps_per_second": 8.478, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 3.242370170843729e-05, + "loss": 0.8118, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 3.70556590953569e-05, + "loss": 0.7988, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8035308122634888, + "eval_runtime": 6.9331, + "eval_samples_per_second": 520.404, + "eval_steps_per_second": 8.221, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 4.168761648227652e-05, + "loss": 0.795, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8094593286514282, + "eval_runtime": 6.7592, + "eval_samples_per_second": 533.79, + "eval_steps_per_second": 8.433, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 4.631957386919613e-05, + "loss": 0.7829, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 5.0951531256115746e-05, + "loss": 0.7754, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.7991108298301697, + "eval_runtime": 6.8593, + "eval_samples_per_second": 526.004, + "eval_steps_per_second": 8.31, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 5.5583488643035356e-05, + "loss": 0.7806, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 6.021544602995497e-05, + "loss": 0.7664, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8119838237762451, + "eval_runtime": 6.7401, + "eval_samples_per_second": 535.302, + "eval_steps_per_second": 8.457, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 6.484740341687458e-05, + "loss": 0.7582, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8043816685676575, + "eval_runtime": 6.575, + "eval_samples_per_second": 548.741, + "eval_steps_per_second": 8.669, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 6.94793608037942e-05, + "loss": 0.7673, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 7.41113181907138e-05, + "loss": 0.7552, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8017526865005493, + "eval_runtime": 6.9019, + "eval_samples_per_second": 522.757, + "eval_steps_per_second": 8.259, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 7.526261054058212e-05, + "loss": 0.7448, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8017557859420776, + "eval_runtime": 6.8249, + "eval_samples_per_second": 528.649, + "eval_steps_per_second": 8.352, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 7.482220102905957e-05, + "loss": 0.7433, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 7.402834157299292e-05, + "loss": 0.7379, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8209928274154663, + "eval_runtime": 6.8309, + "eval_samples_per_second": 528.19, + "eval_steps_per_second": 8.344, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 7.28885864763199e-05, + "loss": 0.7373, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 7.14137815585984e-05, + "loss": 0.7376, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.7964752316474915, + "eval_runtime": 6.9023, + "eval_samples_per_second": 522.721, + "eval_steps_per_second": 8.258, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 6.961796094704345e-05, + "loss": 0.734, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8214039206504822, + "eval_runtime": 6.5898, + "eval_samples_per_second": 547.515, + "eval_steps_per_second": 8.65, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 6.751821352888016e-05, + "loss": 0.7351, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 6.513452033484135e-05, + "loss": 0.7235, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8833148558758315, + "eval_loss": 0.8590376973152161, + "eval_runtime": 6.6898, + "eval_samples_per_second": 539.33, + "eval_steps_per_second": 8.52, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 6.248956440125596e-05, + "loss": 0.7229, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 5.960851492006531e-05, + "loss": 0.7195, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8380500674247742, + "eval_runtime": 6.6615, + "eval_samples_per_second": 541.621, + "eval_steps_per_second": 8.557, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 5.651878773077874e-05, + "loss": 0.7184, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8142021298408508, + "eval_runtime": 6.6641, + "eval_samples_per_second": 541.405, + "eval_steps_per_second": 8.553, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 5.324978443350835e-05, + "loss": 0.7185, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 4.983261260566252e-05, + "loss": 0.7146, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.924889135254989, + "eval_loss": 0.7931919097900391, + "eval_runtime": 6.9032, + "eval_samples_per_second": 522.659, + "eval_steps_per_second": 8.257, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 4.629978978469408e-05, + "loss": 0.709, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 4.268493403377998e-05, + "loss": 0.7137, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8139432072639465, + "eval_runtime": 6.9218, + "eval_samples_per_second": 521.25, + "eval_steps_per_second": 8.235, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 3.9022444034985084e-05, + "loss": 0.7151, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8024135828018188, + "eval_runtime": 6.7198, + "eval_samples_per_second": 536.918, + "eval_steps_per_second": 8.482, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 3.534717175411804e-05, + "loss": 0.7077, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 3.1694090792175026e-05, + "loss": 0.7048, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8054723143577576, + "eval_runtime": 6.6421, + "eval_samples_per_second": 543.198, + "eval_steps_per_second": 8.582, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 2.8097963579312558e-05, + "loss": 0.7037, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8053677082061768, + "eval_runtime": 6.7953, + "eval_samples_per_second": 530.957, + "eval_steps_per_second": 8.388, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 2.4593010578305274e-05, + "loss": 0.706, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.12125846453227e-05, + "loss": 0.7035, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7991043925285339, + "eval_runtime": 6.7317, + "eval_samples_per_second": 535.971, + "eval_steps_per_second": 8.467, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.7988853646781905e-05, + "loss": 0.7031, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.495249435246913e-05, + "loss": 0.7033, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.7974876761436462, + "eval_runtime": 6.7983, + "eval_samples_per_second": 530.717, + "eval_steps_per_second": 8.384, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.2132400517819372e-05, + "loss": 0.6977, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.7997455596923828, + "eval_runtime": 6.6493, + "eval_samples_per_second": 542.613, + "eval_steps_per_second": 8.572, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 9.555407933219724e-06, + "loss": 0.6969, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.246039056745749e-06, + "loss": 0.6973, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.8020610809326172, + "eval_runtime": 6.9768, + "eval_samples_per_second": 517.145, + "eval_steps_per_second": 8.17, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.226269660385627e-06, + "loss": 0.6973, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.5153197103280903e-06, + "loss": 0.7022, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.7975735664367676, + "eval_runtime": 6.9057, + "eval_samples_per_second": 522.465, + "eval_steps_per_second": 8.254, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.129470471280839e-06, + "loss": 0.6973, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9235033259423503, + "eval_loss": 0.7965211272239685, + "eval_runtime": 6.3521, + "eval_samples_per_second": 568.0, + "eval_steps_per_second": 8.973, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.0819095752404545e-06, + "loss": 0.6939, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.8260552902698027e-07, + "loss": 0.6946, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.7995522618293762, + "eval_runtime": 6.8135, + "eval_samples_per_second": 529.538, + "eval_steps_per_second": 8.366, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 3.8212854759656806e-08, + "loss": 0.6972, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9257206208425721, + "eval_loss": 0.7965267300605774, + "eval_runtime": 7.0035, + "eval_samples_per_second": 515.172, + "eval_steps_per_second": 8.139, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 7.535838364103832e-05, + "metric": "eval/loss", + "warmup_ratio": 0.3355123721844324 + } +} diff --git a/run-mrgxuzuz/checkpoint-1260/training_args.bin b/run-mrgxuzuz/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8c3764fe79631e22db1a7377a79478bd2d4d4bc --- /dev/null +++ b/run-mrgxuzuz/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea25aea50bfb28183f3389148d87e35bb2df4cf90d5c30fa06302d7e257b9ec4 +size 4792 diff --git a/run-mum0q0dq/checkpoint-1190/model.safetensors b/run-mum0q0dq/checkpoint-1190/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2fe501f86ef36135968d658c6bb27a9b3a4d2ce0 --- /dev/null +++ b/run-mum0q0dq/checkpoint-1190/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c4df880ab26f88a2f58685670916c581e0e15be7a4cce514755a51476fe6cb +size 198025308 diff --git a/run-mum0q0dq/checkpoint-1190/optimizer.pt b/run-mum0q0dq/checkpoint-1190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f02bd0fb659d51ced24de42428b2bc4af0cb59d0 --- /dev/null +++ b/run-mum0q0dq/checkpoint-1190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f851d35f3b67f25b20027d039f874c214bc4d89761b28111607be172237f8b96 +size 395900602 diff --git a/run-mum0q0dq/checkpoint-1190/rng_state.pth b/run-mum0q0dq/checkpoint-1190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa10329c52a02715f873c9a50812feb7d32c8cd3 --- /dev/null +++ b/run-mum0q0dq/checkpoint-1190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f5febab37757cc5268c77056c937c9c526090d892464a785cf2004d48e5d85 +size 14244 diff --git a/run-mum0q0dq/checkpoint-1190/scheduler.pt b/run-mum0q0dq/checkpoint-1190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2080bb911a16e53456130c150a8ece7bc3dcbac --- /dev/null +++ b/run-mum0q0dq/checkpoint-1190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48914c981042d4b41f4ea16ddd8fdde654febd3b1efa533e38ac81a305b4830c +size 1064 diff --git a/run-mum0q0dq/checkpoint-1190/trainer_state.json b/run-mum0q0dq/checkpoint-1190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0f32fffb307517be9dc50a19b3db8b5c1a37a2b8 --- /dev/null +++ b/run-mum0q0dq/checkpoint-1190/trainer_state.json @@ -0,0 +1,549 @@ +{ + "best_metric": 0.9185144124168514, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-mum0q0dq/checkpoint-1190", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 1190, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00012805302923822064, + "loss": 1.1577, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8690281510353088, + "eval_runtime": 6.8017, + "eval_samples_per_second": 530.458, + "eval_steps_per_second": 8.38, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002561060584764413, + "loss": 0.8454, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00038415908771466194, + "loss": 0.8194, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8761086474501109, + "eval_loss": 0.8702611327171326, + "eval_runtime": 6.9556, + "eval_samples_per_second": 518.719, + "eval_steps_per_second": 8.195, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005122121169528826, + "loss": 0.8194, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8747228381374723, + "eval_loss": 0.8597623109817505, + "eval_runtime": 6.9396, + "eval_samples_per_second": 519.912, + "eval_steps_per_second": 8.214, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0006402651461911032, + "loss": 0.8266, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0006593441807009602, + "loss": 0.8175, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8187361419068736, + "eval_loss": 0.958842396736145, + "eval_runtime": 6.7912, + "eval_samples_per_second": 531.273, + "eval_steps_per_second": 8.393, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0006570108838555026, + "loss": 0.8266, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006529574452191814, + "loss": 0.8191, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8508869179600886, + "eval_loss": 0.9678304195404053, + "eval_runtime": 6.8224, + "eval_samples_per_second": 528.846, + "eval_steps_per_second": 8.355, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006472051855582233, + "loss": 0.8191, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.7907427937915743, + "eval_loss": 1.027077317237854, + "eval_runtime": 6.926, + "eval_samples_per_second": 520.933, + "eval_steps_per_second": 8.23, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.000639784361302911, + "loss": 0.8119, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0006307340054011563, + "loss": 0.8162, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8531042128603105, + "eval_loss": 0.9406482577323914, + "eval_runtime": 6.9509, + "eval_samples_per_second": 519.072, + "eval_steps_per_second": 8.2, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0006201017220082855, + "loss": 0.8053, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0006079434360929515, + "loss": 0.7924, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8957871396895787, + "eval_loss": 0.8433511853218079, + "eval_runtime": 6.8773, + "eval_samples_per_second": 524.624, + "eval_steps_per_second": 8.288, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005943230992762283, + "loss": 0.7899, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8416498303413391, + "eval_runtime": 6.8792, + "eval_samples_per_second": 524.482, + "eval_steps_per_second": 8.286, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005793123534511479, + "loss": 0.7945, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005629901539520169, + "loss": 0.781, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8855321507760532, + "eval_loss": 0.8604955673217773, + "eval_runtime": 6.766, + "eval_samples_per_second": 533.258, + "eval_steps_per_second": 8.425, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005454423542556109, + "loss": 0.7659, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8337368369102478, + "eval_runtime": 6.6704, + "eval_samples_per_second": 540.896, + "eval_steps_per_second": 8.545, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005267612543986848, + "loss": 0.7809, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0005070451154870894, + "loss": 0.7615, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8505132794380188, + "eval_runtime": 6.7914, + "eval_samples_per_second": 531.262, + "eval_steps_per_second": 8.393, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004863976428501364, + "loss": 0.7648, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004649274405587783, + "loss": 0.757, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.8380396962165833, + "eval_runtime": 6.9482, + "eval_samples_per_second": 519.269, + "eval_steps_per_second": 8.204, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.000442747440176796, + "loss": 0.7618, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8248789310455322, + "eval_runtime": 6.7846, + "eval_samples_per_second": 531.789, + "eval_steps_per_second": 8.401, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.000419974306749717, + "loss": 0.7573, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00039672782515591396, + "loss": 0.7408, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8384109735488892, + "eval_runtime": 6.7455, + "eval_samples_per_second": 534.875, + "eval_steps_per_second": 8.45, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003731302700476317, + "loss": 0.7407, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003493057626959984, + "loss": 0.7447, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8292194604873657, + "eval_runtime": 6.7571, + "eval_samples_per_second": 533.958, + "eval_steps_per_second": 8.436, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003253796181229696, + "loss": 0.7396, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8235617876052856, + "eval_runtime": 6.925, + "eval_samples_per_second": 521.015, + "eval_steps_per_second": 8.231, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003014776859542366, + "loss": 0.7305, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00027772568846016153, + "loss": 0.7254, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8225652575492859, + "eval_runtime": 7.0224, + "eval_samples_per_second": 513.782, + "eval_steps_per_second": 8.117, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00025424855926659137, + "loss": 0.7212, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023116978621387982, + "loss": 0.7176, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8167243003845215, + "eval_runtime": 6.9369, + "eval_samples_per_second": 520.118, + "eval_steps_per_second": 8.217, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002086107618206265, + "loss": 0.7098, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8160545229911804, + "eval_runtime": 6.7153, + "eval_samples_per_second": 537.283, + "eval_steps_per_second": 8.488, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0001866901447686423, + "loss": 0.7113, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00016552323576767973, + "loss": 0.7144, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.816665530204773, + "eval_runtime": 6.7754, + "eval_samples_per_second": 532.511, + "eval_steps_per_second": 8.413, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001452213710828307, + "loss": 0.7079, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8123109340667725, + "eval_runtime": 7.011, + "eval_samples_per_second": 514.623, + "eval_steps_per_second": 8.13, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001258913369145893, + "loss": 0.7071, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001076348077118933, + "loss": 0.7043, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.814229428768158, + "eval_runtime": 6.7528, + "eval_samples_per_second": 534.296, + "eval_steps_per_second": 8.441, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.054781137257503e-05, + "loss": 0.7025, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 7.472022414422346e-05, + "loss": 0.6979, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8085876703262329, + "eval_runtime": 6.9838, + "eval_samples_per_second": 516.623, + "eval_steps_per_second": 8.162, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.023529788223892e-05, + "loss": 0.7044, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8113421201705933, + "eval_runtime": 6.7747, + "eval_samples_per_second": 532.568, + "eval_steps_per_second": 8.414, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 4.716922215166693e-05, + "loss": 0.6988, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.559072347611967e-05, + "loss": 0.6935, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8096914887428284, + "eval_runtime": 6.7593, + "eval_samples_per_second": 533.786, + "eval_steps_per_second": 8.433, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.5560703841704238e-05, + "loss": 0.6959, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.713192035739887e-05, + "loss": 0.6917, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8081953525543213, + "eval_runtime": 6.9966, + "eval_samples_per_second": 515.678, + "eval_steps_per_second": 8.147, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.0348707756837439e-05, + "loss": 0.6923, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8098569512367249, + "eval_runtime": 6.7115, + "eval_samples_per_second": 537.585, + "eval_steps_per_second": 8.493, + "step": 1190 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0006599656122277525, + "metric": "eval/loss", + "warmup_ratio": 0.10589245981913464 + } +} diff --git a/run-mum0q0dq/checkpoint-1190/training_args.bin b/run-mum0q0dq/checkpoint-1190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7479295bce229b636094c8342f00d4b6c93cbdf --- /dev/null +++ b/run-mum0q0dq/checkpoint-1190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c061cf669377da46f9f3bd8310b1e5d7da6189a63a9a41b26d4c3993ac78e2 +size 4792 diff --git a/run-mum0q0dq/checkpoint-1260/model.safetensors b/run-mum0q0dq/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6295b7828c2545aa13d5ed25691599f3bc93fe17 --- /dev/null +++ b/run-mum0q0dq/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9a1611c8a322a8b0a6bce2ac755ec08eaae0b187890bc4763029ad7a4ebd7e +size 198025308 diff --git a/run-mum0q0dq/checkpoint-1260/optimizer.pt b/run-mum0q0dq/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bf77a5760596e1fa01a5ac50b3bd350be5cc40d --- /dev/null +++ b/run-mum0q0dq/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:767ffa0f93d735d81ef6ccdfd9f7b6953bfbcc4886121bd6d50f09fe9c4198d4 +size 395900602 diff --git a/run-mum0q0dq/checkpoint-1260/rng_state.pth b/run-mum0q0dq/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-mum0q0dq/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-mum0q0dq/checkpoint-1260/scheduler.pt b/run-mum0q0dq/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf1aa2e18dda0a5b533488c14a04c719e9f6a027 --- /dev/null +++ b/run-mum0q0dq/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735992dab7d89d909f396b86d170a20781ad5b5de434bf58da8948e4678190f4 +size 1064 diff --git a/run-mum0q0dq/checkpoint-1260/trainer_state.json b/run-mum0q0dq/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d6fc505f9702e46cdcf6bc8ec565be70c4f6b607 --- /dev/null +++ b/run-mum0q0dq/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9185144124168514, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-mum0q0dq/checkpoint-1190", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00012805302923822064, + "loss": 1.1577, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8690281510353088, + "eval_runtime": 6.8017, + "eval_samples_per_second": 530.458, + "eval_steps_per_second": 8.38, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002561060584764413, + "loss": 0.8454, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00038415908771466194, + "loss": 0.8194, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8761086474501109, + "eval_loss": 0.8702611327171326, + "eval_runtime": 6.9556, + "eval_samples_per_second": 518.719, + "eval_steps_per_second": 8.195, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005122121169528826, + "loss": 0.8194, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8747228381374723, + "eval_loss": 0.8597623109817505, + "eval_runtime": 6.9396, + "eval_samples_per_second": 519.912, + "eval_steps_per_second": 8.214, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0006402651461911032, + "loss": 0.8266, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0006593441807009602, + "loss": 0.8175, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8187361419068736, + "eval_loss": 0.958842396736145, + "eval_runtime": 6.7912, + "eval_samples_per_second": 531.273, + "eval_steps_per_second": 8.393, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0006570108838555026, + "loss": 0.8266, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006529574452191814, + "loss": 0.8191, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8508869179600886, + "eval_loss": 0.9678304195404053, + "eval_runtime": 6.8224, + "eval_samples_per_second": 528.846, + "eval_steps_per_second": 8.355, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006472051855582233, + "loss": 0.8191, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.7907427937915743, + "eval_loss": 1.027077317237854, + "eval_runtime": 6.926, + "eval_samples_per_second": 520.933, + "eval_steps_per_second": 8.23, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.000639784361302911, + "loss": 0.8119, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0006307340054011563, + "loss": 0.8162, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8531042128603105, + "eval_loss": 0.9406482577323914, + "eval_runtime": 6.9509, + "eval_samples_per_second": 519.072, + "eval_steps_per_second": 8.2, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0006201017220082855, + "loss": 0.8053, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0006079434360929515, + "loss": 0.7924, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8957871396895787, + "eval_loss": 0.8433511853218079, + "eval_runtime": 6.8773, + "eval_samples_per_second": 524.624, + "eval_steps_per_second": 8.288, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005943230992762283, + "loss": 0.7899, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8416498303413391, + "eval_runtime": 6.8792, + "eval_samples_per_second": 524.482, + "eval_steps_per_second": 8.286, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005793123534511479, + "loss": 0.7945, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005629901539520169, + "loss": 0.781, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8855321507760532, + "eval_loss": 0.8604955673217773, + "eval_runtime": 6.766, + "eval_samples_per_second": 533.258, + "eval_steps_per_second": 8.425, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005454423542556109, + "loss": 0.7659, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8337368369102478, + "eval_runtime": 6.6704, + "eval_samples_per_second": 540.896, + "eval_steps_per_second": 8.545, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005267612543986848, + "loss": 0.7809, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0005070451154870894, + "loss": 0.7615, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.891629711751663, + "eval_loss": 0.8505132794380188, + "eval_runtime": 6.7914, + "eval_samples_per_second": 531.262, + "eval_steps_per_second": 8.393, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004863976428501364, + "loss": 0.7648, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004649274405587783, + "loss": 0.757, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.8380396962165833, + "eval_runtime": 6.9482, + "eval_samples_per_second": 519.269, + "eval_steps_per_second": 8.204, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.000442747440176796, + "loss": 0.7618, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8248789310455322, + "eval_runtime": 6.7846, + "eval_samples_per_second": 531.789, + "eval_steps_per_second": 8.401, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.000419974306749717, + "loss": 0.7573, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00039672782515591396, + "loss": 0.7408, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8384109735488892, + "eval_runtime": 6.7455, + "eval_samples_per_second": 534.875, + "eval_steps_per_second": 8.45, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003731302700476317, + "loss": 0.7407, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003493057626959984, + "loss": 0.7447, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8292194604873657, + "eval_runtime": 6.7571, + "eval_samples_per_second": 533.958, + "eval_steps_per_second": 8.436, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003253796181229696, + "loss": 0.7396, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8235617876052856, + "eval_runtime": 6.925, + "eval_samples_per_second": 521.015, + "eval_steps_per_second": 8.231, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003014776859542366, + "loss": 0.7305, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00027772568846016153, + "loss": 0.7254, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8225652575492859, + "eval_runtime": 7.0224, + "eval_samples_per_second": 513.782, + "eval_steps_per_second": 8.117, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00025424855926659137, + "loss": 0.7212, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023116978621387982, + "loss": 0.7176, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8167243003845215, + "eval_runtime": 6.9369, + "eval_samples_per_second": 520.118, + "eval_steps_per_second": 8.217, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002086107618206265, + "loss": 0.7098, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8160545229911804, + "eval_runtime": 6.7153, + "eval_samples_per_second": 537.283, + "eval_steps_per_second": 8.488, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0001866901447686423, + "loss": 0.7113, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00016552323576767973, + "loss": 0.7144, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.816665530204773, + "eval_runtime": 6.7754, + "eval_samples_per_second": 532.511, + "eval_steps_per_second": 8.413, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001452213710828307, + "loss": 0.7079, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8123109340667725, + "eval_runtime": 7.011, + "eval_samples_per_second": 514.623, + "eval_steps_per_second": 8.13, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001258913369145893, + "loss": 0.7071, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001076348077118933, + "loss": 0.7043, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.814229428768158, + "eval_runtime": 6.7528, + "eval_samples_per_second": 534.296, + "eval_steps_per_second": 8.441, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.054781137257503e-05, + "loss": 0.7025, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 7.472022414422346e-05, + "loss": 0.6979, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8085876703262329, + "eval_runtime": 6.9838, + "eval_samples_per_second": 516.623, + "eval_steps_per_second": 8.162, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.023529788223892e-05, + "loss": 0.7044, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8113421201705933, + "eval_runtime": 6.7747, + "eval_samples_per_second": 532.568, + "eval_steps_per_second": 8.414, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 4.716922215166693e-05, + "loss": 0.6988, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.559072347611967e-05, + "loss": 0.6935, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8096914887428284, + "eval_runtime": 6.7593, + "eval_samples_per_second": 533.786, + "eval_steps_per_second": 8.433, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.5560703841704238e-05, + "loss": 0.6959, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.713192035739887e-05, + "loss": 0.6917, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8081953525543213, + "eval_runtime": 6.9966, + "eval_samples_per_second": 515.678, + "eval_steps_per_second": 8.147, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.0348707756837439e-05, + "loss": 0.6923, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8098569512367249, + "eval_runtime": 6.7115, + "eval_samples_per_second": 537.585, + "eval_steps_per_second": 8.493, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 5.246745201121808e-06, + "loss": 0.6952, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.852868609259507e-06, + "loss": 0.6962, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8084741830825806, + "eval_runtime": 6.8314, + "eval_samples_per_second": 528.15, + "eval_steps_per_second": 8.344, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.8492950335546405e-07, + "loss": 0.6913, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8067858219146729, + "eval_runtime": 6.8089, + "eval_samples_per_second": 529.892, + "eval_steps_per_second": 8.371, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0006599656122277525, + "metric": "eval/loss", + "warmup_ratio": 0.10589245981913464 + } +} diff --git a/run-mum0q0dq/checkpoint-1260/training_args.bin b/run-mum0q0dq/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7479295bce229b636094c8342f00d4b6c93cbdf --- /dev/null +++ b/run-mum0q0dq/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c061cf669377da46f9f3bd8310b1e5d7da6189a63a9a41b26d4c3993ac78e2 +size 4792 diff --git a/run-n0aae3kt/checkpoint-1232/model.safetensors b/run-n0aae3kt/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61c2a1f18174cf3f20b65619bc3f89041bc7bd66 --- /dev/null +++ b/run-n0aae3kt/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e903d15605019cdf80d333e2811c344c7e9e19163b6c3917e406a8e43a8060b +size 198025308 diff --git a/run-n0aae3kt/checkpoint-1232/optimizer.pt b/run-n0aae3kt/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2c9680eebf25047a0c9d56227d73173a453929b --- /dev/null +++ b/run-n0aae3kt/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:119a9e34cbbcca45f9adbdf6562811c6fbfa705b710f04e40b8469ed2af53dbf +size 395900602 diff --git a/run-n0aae3kt/checkpoint-1232/rng_state.pth b/run-n0aae3kt/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-n0aae3kt/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-n0aae3kt/checkpoint-1232/scheduler.pt b/run-n0aae3kt/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2507d34d7ac5ca3df7482e1ddb5f81940114c20 --- /dev/null +++ b/run-n0aae3kt/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:383ccc2f9ee1f376fc10bbbb38f26ef7e1078705f8c7c08bbc98d9aa5c4abf65 +size 1064 diff --git a/run-n0aae3kt/checkpoint-1232/trainer_state.json b/run-n0aae3kt/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0805de094eabb2c1c2d9963fec7f30c579b3931b --- /dev/null +++ b/run-n0aae3kt/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.8799889135254989, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-n0aae3kt/checkpoint-1232", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.074487068480997e-07, + "loss": 1.5373, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.39440133037694014, + "eval_loss": 1.5268347263336182, + "eval_runtime": 6.592, + "eval_samples_per_second": 547.329, + "eval_steps_per_second": 8.647, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 2.148974136961994e-07, + "loss": 1.5306, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 3.2234612054429906e-07, + "loss": 1.5164, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.48281596452328157, + "eval_loss": 1.5014829635620117, + "eval_runtime": 6.7405, + "eval_samples_per_second": 535.27, + "eval_steps_per_second": 8.456, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 4.297948273923988e-07, + "loss": 1.4966, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.6252771618625277, + "eval_loss": 1.4559929370880127, + "eval_runtime": 6.9297, + "eval_samples_per_second": 520.656, + "eval_steps_per_second": 8.225, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 5.372435342404985e-07, + "loss": 1.4741, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 6.446922410885981e-07, + "loss": 1.4429, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7358647450110865, + "eval_loss": 1.4015789031982422, + "eval_runtime": 7.0731, + "eval_samples_per_second": 510.101, + "eval_steps_per_second": 8.059, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 7.521409479366978e-07, + "loss": 1.4022, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 8.595896547847976e-07, + "loss": 1.3655, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8034922394678492, + "eval_loss": 1.3299400806427002, + "eval_runtime": 6.4254, + "eval_samples_per_second": 561.524, + "eval_steps_per_second": 8.871, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 9.670383616328972e-07, + "loss": 1.3202, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8217849223946785, + "eval_loss": 1.2595144510269165, + "eval_runtime": 6.8781, + "eval_samples_per_second": 524.562, + "eval_steps_per_second": 8.287, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 1.074487068480997e-06, + "loss": 1.2643, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 1.1819357753290966e-06, + "loss": 1.2147, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8281596452328159, + "eval_loss": 1.156946063041687, + "eval_runtime": 6.8458, + "eval_samples_per_second": 527.039, + "eval_steps_per_second": 8.326, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 1.2893844821771963e-06, + "loss": 1.1588, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 1.396833189025296e-06, + "loss": 1.0952, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 1.0611629486083984, + "eval_runtime": 6.785, + "eval_samples_per_second": 531.763, + "eval_steps_per_second": 8.401, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 1.4793245212128266e-06, + "loss": 1.0404, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9948024749755859, + "eval_runtime": 6.7195, + "eval_samples_per_second": 536.941, + "eval_steps_per_second": 8.483, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 1.474896302407762e-06, + "loss": 1.0035, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 1.464443674372238e-06, + "loss": 0.9754, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9592161774635315, + "eval_runtime": 6.9249, + "eval_samples_per_second": 521.022, + "eval_steps_per_second": 8.231, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 1.4480522939486382e-06, + "loss": 0.9643, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8298226164079823, + "eval_loss": 0.9397813081741333, + "eval_runtime": 6.6766, + "eval_samples_per_second": 540.391, + "eval_steps_per_second": 8.537, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 1.425856484666377e-06, + "loss": 0.9446, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 1.398038135991989e-06, + "loss": 0.9357, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8361973392461197, + "eval_loss": 0.9241200089454651, + "eval_runtime": 6.757, + "eval_samples_per_second": 533.963, + "eval_steps_per_second": 8.436, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 1.3648252127874855e-06, + "loss": 0.9254, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 1.3264898871916198e-06, + "loss": 0.9182, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8453436807095344, + "eval_loss": 0.9107545018196106, + "eval_runtime": 7.0545, + "eval_samples_per_second": 511.446, + "eval_steps_per_second": 8.08, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 1.2833463082328588e-06, + "loss": 0.9065, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8508869179600886, + "eval_loss": 0.9055072665214539, + "eval_runtime": 6.8468, + "eval_samples_per_second": 526.958, + "eval_steps_per_second": 8.325, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 1.2357480274515587e-06, + "loss": 0.9144, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 1.1840851016277707e-06, + "loss": 0.8969, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8580931263858093, + "eval_loss": 0.899703860282898, + "eval_runtime": 6.8439, + "eval_samples_per_second": 527.185, + "eval_steps_per_second": 8.329, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 1.1287808963571439e-06, + "loss": 0.9068, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 1.0702886166688741e-06, + "loss": 0.8891, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8603104212860311, + "eval_loss": 0.8915008902549744, + "eval_runtime": 6.7211, + "eval_samples_per_second": 536.813, + "eval_steps_per_second": 8.481, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 1.0090875931164721e-06, + "loss": 0.8967, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.8872919082641602, + "eval_runtime": 6.8061, + "eval_samples_per_second": 530.114, + "eval_steps_per_second": 8.375, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 9.456793537759696e-07, + "loss": 0.8884, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 8.80583514340621e-07, + "loss": 0.8868, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.8700110864745011, + "eval_loss": 0.8794757127761841, + "eval_runtime": 6.8385, + "eval_samples_per_second": 527.599, + "eval_steps_per_second": 8.335, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 8.143335199918087e-07, + "loss": 0.8835, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 7.474722739405298e-07, + "loss": 0.8772, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8666851441241685, + "eval_loss": 0.8807284832000732, + "eval_runtime": 6.6979, + "eval_samples_per_second": 538.68, + "eval_steps_per_second": 8.51, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 6.805476884625407e-07, + "loss": 0.8903, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8702882483370288, + "eval_loss": 0.8785661458969116, + "eval_runtime": 6.9842, + "eval_samples_per_second": 516.591, + "eval_steps_per_second": 8.161, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 6.141081948853876e-07, + "loss": 0.8714, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 5.48698249321929e-07, + "loss": 0.872, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.875, + "eval_loss": 0.872904896736145, + "eval_runtime": 6.9338, + "eval_samples_per_second": 520.35, + "eval_steps_per_second": 8.221, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 4.84853870979813e-07, + "loss": 0.8816, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.8763858093126385, + "eval_loss": 0.8746911883354187, + "eval_runtime": 6.7222, + "eval_samples_per_second": 536.726, + "eval_steps_per_second": 8.479, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 4.230982496094197e-07, + "loss": 0.8713, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.6393745808620604e-07, + "loss": 0.87, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.8755543237250555, + "eval_loss": 0.8719679713249207, + "eval_runtime": 6.9531, + "eval_samples_per_second": 518.908, + "eval_steps_per_second": 8.198, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 3.0785630526183747e-07, + "loss": 0.8759, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.553143630690216e-07, + "loss": 0.8736, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.8761086474501109, + "eval_loss": 0.870969831943512, + "eval_runtime": 6.726, + "eval_samples_per_second": 536.429, + "eval_steps_per_second": 8.475, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 2.0674220043699218e-07, + "loss": 0.8758, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.8741685144124168, + "eval_loss": 0.8720474243164062, + "eval_runtime": 6.5852, + "eval_samples_per_second": 547.897, + "eval_steps_per_second": 8.656, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.625378548798228e-07, + "loss": 0.8692, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.2306357067208147e-07, + "loss": 0.8712, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.8797117516629712, + "eval_loss": 0.869019091129303, + "eval_runtime": 6.5783, + "eval_samples_per_second": 548.466, + "eval_steps_per_second": 8.665, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 8.864283034171151e-08, + "loss": 0.8743, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 5.955770380636261e-08, + "loss": 0.8737, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.8783259423503326, + "eval_loss": 0.8727265000343323, + "eval_runtime": 6.6905, + "eval_samples_per_second": 539.275, + "eval_steps_per_second": 8.52, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 3.6046536876377645e-08, + "loss": 0.8774, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.8783259423503326, + "eval_loss": 0.8700127005577087, + "eval_runtime": 6.881, + "eval_samples_per_second": 524.344, + "eval_steps_per_second": 8.284, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.8301998066615446e-08, + "loss": 0.8687, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 6.469499723026765e-09, + "loss": 0.8619, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.8799889135254989, + "eval_loss": 0.8671879172325134, + "eval_runtime": 6.8708, + "eval_samples_per_second": 525.122, + "eval_steps_per_second": 8.296, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 1.479486040446911e-06, + "metric": "eval/loss", + "warmup_ratio": 0.28406347610849936 + } +} diff --git a/run-n0aae3kt/checkpoint-1232/training_args.bin b/run-n0aae3kt/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d179d3e8689f2d2f85e32fb26879a8a093a653b3 --- /dev/null +++ b/run-n0aae3kt/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5f1b9d70e200e45e1e06fe0b18cb472fef3f10040e11ad2e02f3ecc109cda8c +size 4792 diff --git a/run-n0aae3kt/checkpoint-1260/model.safetensors b/run-n0aae3kt/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..503c51a8aa5903536c438679b0f206aa900480c2 --- /dev/null +++ b/run-n0aae3kt/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf732116bd5e53a9ecdf87d391094bedf2951843522835f13083e48725029f4b +size 198025308 diff --git a/run-n0aae3kt/checkpoint-1260/optimizer.pt b/run-n0aae3kt/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..426bd1ada4d6887baeef720c6886ec37857f917c --- /dev/null +++ b/run-n0aae3kt/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c1704e9280d34c31100f1d451026334a6bb441c3deae3cbd031af2b0721412 +size 395900602 diff --git a/run-n0aae3kt/checkpoint-1260/rng_state.pth b/run-n0aae3kt/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-n0aae3kt/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-n0aae3kt/checkpoint-1260/scheduler.pt b/run-n0aae3kt/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cc598497be14c08591c18368356a91a3a9a3ed8 --- /dev/null +++ b/run-n0aae3kt/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c25e4e06d5fee0b1159a047d810fffd30d3cb82066eecaddbc6eaeb1b64a3d6 +size 1064 diff --git a/run-n0aae3kt/checkpoint-1260/trainer_state.json b/run-n0aae3kt/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4afa5449db031449ccfd86dd575db0f6e1c1f3ca --- /dev/null +++ b/run-n0aae3kt/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.8819290465631929, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-n0aae3kt/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.074487068480997e-07, + "loss": 1.5373, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.39440133037694014, + "eval_loss": 1.5268347263336182, + "eval_runtime": 6.592, + "eval_samples_per_second": 547.329, + "eval_steps_per_second": 8.647, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 2.148974136961994e-07, + "loss": 1.5306, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 3.2234612054429906e-07, + "loss": 1.5164, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.48281596452328157, + "eval_loss": 1.5014829635620117, + "eval_runtime": 6.7405, + "eval_samples_per_second": 535.27, + "eval_steps_per_second": 8.456, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 4.297948273923988e-07, + "loss": 1.4966, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.6252771618625277, + "eval_loss": 1.4559929370880127, + "eval_runtime": 6.9297, + "eval_samples_per_second": 520.656, + "eval_steps_per_second": 8.225, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 5.372435342404985e-07, + "loss": 1.4741, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 6.446922410885981e-07, + "loss": 1.4429, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7358647450110865, + "eval_loss": 1.4015789031982422, + "eval_runtime": 7.0731, + "eval_samples_per_second": 510.101, + "eval_steps_per_second": 8.059, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 7.521409479366978e-07, + "loss": 1.4022, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 8.595896547847976e-07, + "loss": 1.3655, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8034922394678492, + "eval_loss": 1.3299400806427002, + "eval_runtime": 6.4254, + "eval_samples_per_second": 561.524, + "eval_steps_per_second": 8.871, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 9.670383616328972e-07, + "loss": 1.3202, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8217849223946785, + "eval_loss": 1.2595144510269165, + "eval_runtime": 6.8781, + "eval_samples_per_second": 524.562, + "eval_steps_per_second": 8.287, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 1.074487068480997e-06, + "loss": 1.2643, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 1.1819357753290966e-06, + "loss": 1.2147, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8281596452328159, + "eval_loss": 1.156946063041687, + "eval_runtime": 6.8458, + "eval_samples_per_second": 527.039, + "eval_steps_per_second": 8.326, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 1.2893844821771963e-06, + "loss": 1.1588, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 1.396833189025296e-06, + "loss": 1.0952, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 1.0611629486083984, + "eval_runtime": 6.785, + "eval_samples_per_second": 531.763, + "eval_steps_per_second": 8.401, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 1.4793245212128266e-06, + "loss": 1.0404, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9948024749755859, + "eval_runtime": 6.7195, + "eval_samples_per_second": 536.941, + "eval_steps_per_second": 8.483, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 1.474896302407762e-06, + "loss": 1.0035, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 1.464443674372238e-06, + "loss": 0.9754, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9592161774635315, + "eval_runtime": 6.9249, + "eval_samples_per_second": 521.022, + "eval_steps_per_second": 8.231, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 1.4480522939486382e-06, + "loss": 0.9643, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8298226164079823, + "eval_loss": 0.9397813081741333, + "eval_runtime": 6.6766, + "eval_samples_per_second": 540.391, + "eval_steps_per_second": 8.537, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 1.425856484666377e-06, + "loss": 0.9446, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 1.398038135991989e-06, + "loss": 0.9357, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8361973392461197, + "eval_loss": 0.9241200089454651, + "eval_runtime": 6.757, + "eval_samples_per_second": 533.963, + "eval_steps_per_second": 8.436, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 1.3648252127874855e-06, + "loss": 0.9254, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 1.3264898871916198e-06, + "loss": 0.9182, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8453436807095344, + "eval_loss": 0.9107545018196106, + "eval_runtime": 7.0545, + "eval_samples_per_second": 511.446, + "eval_steps_per_second": 8.08, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 1.2833463082328588e-06, + "loss": 0.9065, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8508869179600886, + "eval_loss": 0.9055072665214539, + "eval_runtime": 6.8468, + "eval_samples_per_second": 526.958, + "eval_steps_per_second": 8.325, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 1.2357480274515587e-06, + "loss": 0.9144, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 1.1840851016277707e-06, + "loss": 0.8969, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8580931263858093, + "eval_loss": 0.899703860282898, + "eval_runtime": 6.8439, + "eval_samples_per_second": 527.185, + "eval_steps_per_second": 8.329, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 1.1287808963571439e-06, + "loss": 0.9068, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 1.0702886166688741e-06, + "loss": 0.8891, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8603104212860311, + "eval_loss": 0.8915008902549744, + "eval_runtime": 6.7211, + "eval_samples_per_second": 536.813, + "eval_steps_per_second": 8.481, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 1.0090875931164721e-06, + "loss": 0.8967, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8636363636363636, + "eval_loss": 0.8872919082641602, + "eval_runtime": 6.8061, + "eval_samples_per_second": 530.114, + "eval_steps_per_second": 8.375, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 9.456793537759696e-07, + "loss": 0.8884, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 8.80583514340621e-07, + "loss": 0.8868, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.8700110864745011, + "eval_loss": 0.8794757127761841, + "eval_runtime": 6.8385, + "eval_samples_per_second": 527.599, + "eval_steps_per_second": 8.335, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 8.143335199918087e-07, + "loss": 0.8835, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 7.474722739405298e-07, + "loss": 0.8772, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8666851441241685, + "eval_loss": 0.8807284832000732, + "eval_runtime": 6.6979, + "eval_samples_per_second": 538.68, + "eval_steps_per_second": 8.51, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 6.805476884625407e-07, + "loss": 0.8903, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8702882483370288, + "eval_loss": 0.8785661458969116, + "eval_runtime": 6.9842, + "eval_samples_per_second": 516.591, + "eval_steps_per_second": 8.161, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 6.141081948853876e-07, + "loss": 0.8714, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 5.48698249321929e-07, + "loss": 0.872, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.875, + "eval_loss": 0.872904896736145, + "eval_runtime": 6.9338, + "eval_samples_per_second": 520.35, + "eval_steps_per_second": 8.221, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 4.84853870979813e-07, + "loss": 0.8816, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.8763858093126385, + "eval_loss": 0.8746911883354187, + "eval_runtime": 6.7222, + "eval_samples_per_second": 536.726, + "eval_steps_per_second": 8.479, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 4.230982496094197e-07, + "loss": 0.8713, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.6393745808620604e-07, + "loss": 0.87, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.8755543237250555, + "eval_loss": 0.8719679713249207, + "eval_runtime": 6.9531, + "eval_samples_per_second": 518.908, + "eval_steps_per_second": 8.198, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 3.0785630526183747e-07, + "loss": 0.8759, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.553143630690216e-07, + "loss": 0.8736, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.8761086474501109, + "eval_loss": 0.870969831943512, + "eval_runtime": 6.726, + "eval_samples_per_second": 536.429, + "eval_steps_per_second": 8.475, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 2.0674220043699218e-07, + "loss": 0.8758, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.8741685144124168, + "eval_loss": 0.8720474243164062, + "eval_runtime": 6.5852, + "eval_samples_per_second": 547.897, + "eval_steps_per_second": 8.656, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.625378548798228e-07, + "loss": 0.8692, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.2306357067208147e-07, + "loss": 0.8712, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.8797117516629712, + "eval_loss": 0.869019091129303, + "eval_runtime": 6.5783, + "eval_samples_per_second": 548.466, + "eval_steps_per_second": 8.665, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 8.864283034171151e-08, + "loss": 0.8743, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 5.955770380636261e-08, + "loss": 0.8737, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.8783259423503326, + "eval_loss": 0.8727265000343323, + "eval_runtime": 6.6905, + "eval_samples_per_second": 539.275, + "eval_steps_per_second": 8.52, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 3.6046536876377645e-08, + "loss": 0.8774, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.8783259423503326, + "eval_loss": 0.8700127005577087, + "eval_runtime": 6.881, + "eval_samples_per_second": 524.344, + "eval_steps_per_second": 8.284, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.8301998066615446e-08, + "loss": 0.8687, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 6.469499723026765e-09, + "loss": 0.8619, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.8799889135254989, + "eval_loss": 0.8671879172325134, + "eval_runtime": 6.8708, + "eval_samples_per_second": 525.122, + "eval_steps_per_second": 8.296, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 6.460064024841031e-10, + "loss": 0.8734, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.8819290465631929, + "eval_loss": 0.8650510907173157, + "eval_runtime": 6.8535, + "eval_samples_per_second": 526.444, + "eval_steps_per_second": 8.317, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 1.479486040446911e-06, + "metric": "eval/loss", + "warmup_ratio": 0.28406347610849936 + } +} diff --git a/run-n0aae3kt/checkpoint-1260/training_args.bin b/run-n0aae3kt/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d179d3e8689f2d2f85e32fb26879a8a093a653b3 --- /dev/null +++ b/run-n0aae3kt/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5f1b9d70e200e45e1e06fe0b18cb472fef3f10040e11ad2e02f3ecc109cda8c +size 4792 diff --git a/run-n97r4jfo/checkpoint-552/model.safetensors b/run-n97r4jfo/checkpoint-552/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2585926b9b29b529b087367fb5b7c6bf5294c08d --- /dev/null +++ b/run-n97r4jfo/checkpoint-552/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97bd2791b9af9acef4c495b08bf10d5f80738098b37a5b02ce347ce5c123abfe +size 198025308 diff --git a/run-n97r4jfo/checkpoint-552/optimizer.pt b/run-n97r4jfo/checkpoint-552/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..489e6f114f882422cd5ecaebefdb4a094b3c1fb6 --- /dev/null +++ b/run-n97r4jfo/checkpoint-552/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1841e5f5009508cb1e644a6fb44e0eb32a367277d5874b2e88972a7afc77ba6d +size 395900602 diff --git a/run-n97r4jfo/checkpoint-552/rng_state.pth b/run-n97r4jfo/checkpoint-552/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a07d02214d4d2a0bd650d84451df8b01ad9e2e1f --- /dev/null +++ b/run-n97r4jfo/checkpoint-552/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea0e7f2a9ffdc1f2f52e0b770bd1a5190fc0a00c767b73b57597c52b6f4dee6 +size 14244 diff --git a/run-n97r4jfo/checkpoint-552/scheduler.pt b/run-n97r4jfo/checkpoint-552/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d3576a6dd3d6a49b2a163b275226206224b9fc7 --- /dev/null +++ b/run-n97r4jfo/checkpoint-552/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4b44bc1e8767ffd4cf7762c7accec55314119ca42005ccec102a1350074fd3 +size 1064 diff --git a/run-n97r4jfo/checkpoint-552/trainer_state.json b/run-n97r4jfo/checkpoint-552/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7a41a57421541f0210af6f58f624f6de7bb4f2d3 --- /dev/null +++ b/run-n97r4jfo/checkpoint-552/trainer_state.json @@ -0,0 +1,592 @@ +{ + "best_metric": 0.918341863867805, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-n97r4jfo/checkpoint-552", + "epoch": 25.976470588235294, + "eval_steps": 500, + "global_step": 552, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.81949060914269e-05, + "loss": 1.4623, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.0061482191085815, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.1392, + "eval_samples_per_second": 443.287, + "eval_steps_per_second": 3.563, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.63898121828538e-05, + "loss": 1.0984, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.458471827428071e-05, + "loss": 0.9291, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8738913525498891, + "eval_f1": 0.8499331077573012, + "eval_loss": 0.9094669818878174, + "eval_precision": 0.8727221142880844, + "eval_recall": 0.8738913525498891, + "eval_runtime": 8.2277, + "eval_samples_per_second": 438.518, + "eval_steps_per_second": 3.525, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.27796243657076e-05, + "loss": 0.8542, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.8992157822605333, + "eval_loss": 0.8313647508621216, + "eval_precision": 0.8955548426081152, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.1923, + "eval_samples_per_second": 440.415, + "eval_steps_per_second": 3.54, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.097453045713452e-05, + "loss": 0.8253, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00010916943654856142, + "loss": 0.7978, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.9011132153452561, + "eval_loss": 0.8353525996208191, + "eval_precision": 0.9014574859355613, + "eval_recall": 0.9032705099778271, + "eval_runtime": 8.2603, + "eval_samples_per_second": 436.79, + "eval_steps_per_second": 3.511, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001273643426399883, + "loss": 0.7931, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001455592487314152, + "loss": 0.7797, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8939988504024552, + "eval_loss": 0.8228206634521484, + "eval_precision": 0.8930120373812349, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.6421, + "eval_samples_per_second": 472.119, + "eval_steps_per_second": 3.795, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00016375415482284213, + "loss": 0.763, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.911286090626249, + "eval_loss": 0.8086482286453247, + "eval_precision": 0.9115960618723593, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.3317, + "eval_samples_per_second": 433.042, + "eval_steps_per_second": 3.481, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00017632322179603609, + "loss": 0.7642, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00017585603624462143, + "loss": 0.7544, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9050351877070053, + "eval_loss": 0.8221792578697205, + "eval_precision": 0.904818406412009, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.2304, + "eval_samples_per_second": 438.377, + "eval_steps_per_second": 3.524, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00017481342157161077, + "loss": 0.7543, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001732022202257319, + "loss": 0.7456, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8021064301552107, + "eval_f1": 0.8261755986214473, + "eval_loss": 0.9661126732826233, + "eval_precision": 0.8809866218237701, + "eval_recall": 0.8021064301552107, + "eval_runtime": 8.4607, + "eval_samples_per_second": 426.442, + "eval_steps_per_second": 3.428, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00017103300616386926, + "loss": 0.7434, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9016495476106522, + "eval_loss": 0.8209700584411621, + "eval_precision": 0.9093174592927158, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.6635, + "eval_samples_per_second": 470.801, + "eval_steps_per_second": 3.784, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00016832001545653117, + "loss": 0.7386, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00016508105285967847, + "loss": 0.7347, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9028994348058978, + "eval_loss": 0.8126837611198425, + "eval_precision": 0.9054925719295098, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.2069, + "eval_samples_per_second": 439.633, + "eval_steps_per_second": 3.534, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00016133737496606208, + "loss": 0.7275, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.878880266075388, + "eval_f1": 0.8847323333252654, + "eval_loss": 0.8646235466003418, + "eval_precision": 0.8986727581938962, + "eval_recall": 0.878880266075388, + "eval_runtime": 8.3133, + "eval_samples_per_second": 434.004, + "eval_steps_per_second": 3.488, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00015711355070292219, + "loss": 0.7235, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00015243730009157068, + "loss": 0.7192, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9150336434017652, + "eval_loss": 0.8054856657981873, + "eval_precision": 0.91417741570756, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.0579, + "eval_samples_per_second": 447.761, + "eval_steps_per_second": 3.599, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00014733931232704297, + "loss": 0.722, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001418530443717212, + "loss": 0.7133, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9088522465778958, + "eval_loss": 0.8185000419616699, + "eval_precision": 0.9117042100509108, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.2052, + "eval_samples_per_second": 439.724, + "eval_steps_per_second": 3.534, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00013601450138471475, + "loss": 0.716, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9111569702121578, + "eval_loss": 0.8095471858978271, + "eval_precision": 0.9109241638956856, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.9041, + "eval_samples_per_second": 456.47, + "eval_steps_per_second": 3.669, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00012986200042799112, + "loss": 0.7092, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00012343591900000131, + "loss": 0.7072, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8966186252771619, + "eval_f1": 0.8980791498500026, + "eval_loss": 0.828935444355011, + "eval_precision": 0.9041740439180168, + "eval_recall": 0.8966186252771619, + "eval_runtime": 8.369, + "eval_samples_per_second": 431.113, + "eval_steps_per_second": 3.465, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00011677843004711777, + "loss": 0.7099, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010993322519194619, + "loss": 0.7104, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9097177141784047, + "eval_loss": 0.8031111359596252, + "eval_precision": 0.9096879473628462, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8678, + "eval_samples_per_second": 458.577, + "eval_steps_per_second": 3.686, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00010294522799490218, + "loss": 0.7046, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9074279379157428, + "eval_f1": 0.9084074003366833, + "eval_loss": 0.8221327066421509, + "eval_precision": 0.9111762520225571, + "eval_recall": 0.9074279379157428, + "eval_runtime": 8.1433, + "eval_samples_per_second": 443.064, + "eval_steps_per_second": 3.561, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 9.586029913085337e-05, + "loss": 0.7043, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 8.872493541568735e-05, + "loss": 0.7003, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9144084295452984, + "eval_loss": 0.8041298389434814, + "eval_precision": 0.9128235296009194, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.994, + "eval_samples_per_second": 451.336, + "eval_steps_per_second": 3.628, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 8.158596465802749e-05, + "loss": 0.7019, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 7.44902383387165e-05, + "loss": 0.6957, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9073821660777006, + "eval_loss": 0.8143490552902222, + "eval_precision": 0.9078088718908383, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.2268, + "eval_samples_per_second": 438.567, + "eval_steps_per_second": 3.525, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 6.748432413494491e-05, + "loss": 0.7026, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9084677672852983, + "eval_loss": 0.8075430393218994, + "eval_precision": 0.9063369561026408, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9842, + "eval_samples_per_second": 451.892, + "eval_steps_per_second": 3.632, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 6.061420030691932e-05, + "loss": 0.6961, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.392495395274434e-05, + "loss": 0.6917, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9125577382130095, + "eval_loss": 0.8113223314285278, + "eval_precision": 0.9109149564492655, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.1206, + "eval_samples_per_second": 444.302, + "eval_steps_per_second": 3.571, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 4.746048511180295e-05, + "loss": 0.6928, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9110430690962019, + "eval_loss": 0.8087641596794128, + "eval_precision": 0.9096169177781352, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.1405, + "eval_samples_per_second": 443.217, + "eval_steps_per_second": 3.562, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.1263218658540053e-05, + "loss": 0.6938, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.537382587742633e-05, + "loss": 0.6901, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9082344941955525, + "eval_loss": 0.8160849809646606, + "eval_precision": 0.9056215442680617, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.0781, + "eval_samples_per_second": 446.638, + "eval_steps_per_second": 3.59, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.9830957546343734e-05, + "loss": 0.6916, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.4670990280107284e-05, + "loss": 0.6928, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9133354160066935, + "eval_loss": 0.803188681602478, + "eval_precision": 0.9120207234461367, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.0935, + "eval_samples_per_second": 445.792, + "eval_steps_per_second": 3.583, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.9927787798814757e-05, + "loss": 0.6919, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9218403547671841, + "eval_f1": 0.9160442687817897, + "eval_loss": 0.8060706257820129, + "eval_precision": 0.915626398388983, + "eval_recall": 0.9218403547671841, + "eval_runtime": 7.9183, + "eval_samples_per_second": 455.651, + "eval_steps_per_second": 3.662, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.5632478687766358e-05, + "loss": 0.6894, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.1813252107467344e-05, + "loss": 0.689, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.918341863867805, + "eval_loss": 0.7993049025535583, + "eval_precision": 0.9176384019616962, + "eval_recall": 0.9226718403547672, + "eval_runtime": 8.0485, + "eval_samples_per_second": 448.281, + "eval_steps_per_second": 3.603, + "step": 552 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.04884842792144779, + "learning_rate": 0.00017635062827075305, + "metric": "eval/loss", + "weight_decay": 0.07457026044587774 + } +} diff --git a/run-n97r4jfo/checkpoint-552/training_args.bin b/run-n97r4jfo/checkpoint-552/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5db540fbd2e53bf10b3e0bc33e6fcbc1ac928d4d --- /dev/null +++ b/run-n97r4jfo/checkpoint-552/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946e922d5b635f3cc95ffb367e50486265cf9dddd2033baeccacdd3a246cce27 +size 4792 diff --git a/run-n97r4jfo/checkpoint-630/model.safetensors b/run-n97r4jfo/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e2d0211016b5e02a75993dea5560731d95abe69 --- /dev/null +++ b/run-n97r4jfo/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5bd98035ff974f1d77a4d5e4433d3565c7ab4c80c5508a797fdbe1f797386e1 +size 198025308 diff --git a/run-n97r4jfo/checkpoint-630/optimizer.pt b/run-n97r4jfo/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8321a1c12d3077d5851e950a5c4c5fd5e0196ac9 --- /dev/null +++ b/run-n97r4jfo/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca6c1eb1695a678206e2547065957b4eea96712bf85540a8e0c14ebe84d55cc +size 395900602 diff --git a/run-n97r4jfo/checkpoint-630/rng_state.pth b/run-n97r4jfo/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-n97r4jfo/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-n97r4jfo/checkpoint-630/scheduler.pt b/run-n97r4jfo/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d2eb7d175077a1b2640c468f4b2dc0d8131aa28 --- /dev/null +++ b/run-n97r4jfo/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f74849b5e679269ce75bd6b293e4607358cde161eaa715cbc81b38caf78f62 +size 1064 diff --git a/run-n97r4jfo/checkpoint-630/trainer_state.json b/run-n97r4jfo/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d8bc2a516d7a8f6bfb0e91989d61c372ac851ccd --- /dev/null +++ b/run-n97r4jfo/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.918341863867805, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-n97r4jfo/checkpoint-552", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.81949060914269e-05, + "loss": 1.4623, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.0061482191085815, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.1392, + "eval_samples_per_second": 443.287, + "eval_steps_per_second": 3.563, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.63898121828538e-05, + "loss": 1.0984, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.458471827428071e-05, + "loss": 0.9291, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8738913525498891, + "eval_f1": 0.8499331077573012, + "eval_loss": 0.9094669818878174, + "eval_precision": 0.8727221142880844, + "eval_recall": 0.8738913525498891, + "eval_runtime": 8.2277, + "eval_samples_per_second": 438.518, + "eval_steps_per_second": 3.525, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.27796243657076e-05, + "loss": 0.8542, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.8992157822605333, + "eval_loss": 0.8313647508621216, + "eval_precision": 0.8955548426081152, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.1923, + "eval_samples_per_second": 440.415, + "eval_steps_per_second": 3.54, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 9.097453045713452e-05, + "loss": 0.8253, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00010916943654856142, + "loss": 0.7978, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.9011132153452561, + "eval_loss": 0.8353525996208191, + "eval_precision": 0.9014574859355613, + "eval_recall": 0.9032705099778271, + "eval_runtime": 8.2603, + "eval_samples_per_second": 436.79, + "eval_steps_per_second": 3.511, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001273643426399883, + "loss": 0.7931, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001455592487314152, + "loss": 0.7797, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8939988504024552, + "eval_loss": 0.8228206634521484, + "eval_precision": 0.8930120373812349, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.6421, + "eval_samples_per_second": 472.119, + "eval_steps_per_second": 3.795, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00016375415482284213, + "loss": 0.763, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.911286090626249, + "eval_loss": 0.8086482286453247, + "eval_precision": 0.9115960618723593, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.3317, + "eval_samples_per_second": 433.042, + "eval_steps_per_second": 3.481, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00017632322179603609, + "loss": 0.7642, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00017585603624462143, + "loss": 0.7544, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9050351877070053, + "eval_loss": 0.8221792578697205, + "eval_precision": 0.904818406412009, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.2304, + "eval_samples_per_second": 438.377, + "eval_steps_per_second": 3.524, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00017481342157161077, + "loss": 0.7543, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001732022202257319, + "loss": 0.7456, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8021064301552107, + "eval_f1": 0.8261755986214473, + "eval_loss": 0.9661126732826233, + "eval_precision": 0.8809866218237701, + "eval_recall": 0.8021064301552107, + "eval_runtime": 8.4607, + "eval_samples_per_second": 426.442, + "eval_steps_per_second": 3.428, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00017103300616386926, + "loss": 0.7434, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9016495476106522, + "eval_loss": 0.8209700584411621, + "eval_precision": 0.9093174592927158, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.6635, + "eval_samples_per_second": 470.801, + "eval_steps_per_second": 3.784, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00016832001545653117, + "loss": 0.7386, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00016508105285967847, + "loss": 0.7347, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9028994348058978, + "eval_loss": 0.8126837611198425, + "eval_precision": 0.9054925719295098, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.2069, + "eval_samples_per_second": 439.633, + "eval_steps_per_second": 3.534, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00016133737496606208, + "loss": 0.7275, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.878880266075388, + "eval_f1": 0.8847323333252654, + "eval_loss": 0.8646235466003418, + "eval_precision": 0.8986727581938962, + "eval_recall": 0.878880266075388, + "eval_runtime": 8.3133, + "eval_samples_per_second": 434.004, + "eval_steps_per_second": 3.488, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00015711355070292219, + "loss": 0.7235, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00015243730009157068, + "loss": 0.7192, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9150336434017652, + "eval_loss": 0.8054856657981873, + "eval_precision": 0.91417741570756, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.0579, + "eval_samples_per_second": 447.761, + "eval_steps_per_second": 3.599, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00014733931232704297, + "loss": 0.722, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001418530443717212, + "loss": 0.7133, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9088522465778958, + "eval_loss": 0.8185000419616699, + "eval_precision": 0.9117042100509108, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.2052, + "eval_samples_per_second": 439.724, + "eval_steps_per_second": 3.534, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00013601450138471475, + "loss": 0.716, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9111569702121578, + "eval_loss": 0.8095471858978271, + "eval_precision": 0.9109241638956856, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.9041, + "eval_samples_per_second": 456.47, + "eval_steps_per_second": 3.669, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00012986200042799112, + "loss": 0.7092, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00012343591900000131, + "loss": 0.7072, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8966186252771619, + "eval_f1": 0.8980791498500026, + "eval_loss": 0.828935444355011, + "eval_precision": 0.9041740439180168, + "eval_recall": 0.8966186252771619, + "eval_runtime": 8.369, + "eval_samples_per_second": 431.113, + "eval_steps_per_second": 3.465, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00011677843004711777, + "loss": 0.7099, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010993322519194619, + "loss": 0.7104, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9097177141784047, + "eval_loss": 0.8031111359596252, + "eval_precision": 0.9096879473628462, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8678, + "eval_samples_per_second": 458.577, + "eval_steps_per_second": 3.686, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00010294522799490218, + "loss": 0.7046, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9074279379157428, + "eval_f1": 0.9084074003366833, + "eval_loss": 0.8221327066421509, + "eval_precision": 0.9111762520225571, + "eval_recall": 0.9074279379157428, + "eval_runtime": 8.1433, + "eval_samples_per_second": 443.064, + "eval_steps_per_second": 3.561, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 9.586029913085337e-05, + "loss": 0.7043, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 8.872493541568735e-05, + "loss": 0.7003, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9144084295452984, + "eval_loss": 0.8041298389434814, + "eval_precision": 0.9128235296009194, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.994, + "eval_samples_per_second": 451.336, + "eval_steps_per_second": 3.628, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 8.158596465802749e-05, + "loss": 0.7019, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 7.44902383387165e-05, + "loss": 0.6957, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9073821660777006, + "eval_loss": 0.8143490552902222, + "eval_precision": 0.9078088718908383, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.2268, + "eval_samples_per_second": 438.567, + "eval_steps_per_second": 3.525, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 6.748432413494491e-05, + "loss": 0.7026, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9084677672852983, + "eval_loss": 0.8075430393218994, + "eval_precision": 0.9063369561026408, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9842, + "eval_samples_per_second": 451.892, + "eval_steps_per_second": 3.632, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 6.061420030691932e-05, + "loss": 0.6961, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.392495395274434e-05, + "loss": 0.6917, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9125577382130095, + "eval_loss": 0.8113223314285278, + "eval_precision": 0.9109149564492655, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.1206, + "eval_samples_per_second": 444.302, + "eval_steps_per_second": 3.571, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 4.746048511180295e-05, + "loss": 0.6928, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9110430690962019, + "eval_loss": 0.8087641596794128, + "eval_precision": 0.9096169177781352, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.1405, + "eval_samples_per_second": 443.217, + "eval_steps_per_second": 3.562, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.1263218658540053e-05, + "loss": 0.6938, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.537382587742633e-05, + "loss": 0.6901, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9082344941955525, + "eval_loss": 0.8160849809646606, + "eval_precision": 0.9056215442680617, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.0781, + "eval_samples_per_second": 446.638, + "eval_steps_per_second": 3.59, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.9830957546343734e-05, + "loss": 0.6916, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.4670990280107284e-05, + "loss": 0.6928, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9133354160066935, + "eval_loss": 0.803188681602478, + "eval_precision": 0.9120207234461367, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.0935, + "eval_samples_per_second": 445.792, + "eval_steps_per_second": 3.583, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.9927787798814757e-05, + "loss": 0.6919, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9218403547671841, + "eval_f1": 0.9160442687817897, + "eval_loss": 0.8060706257820129, + "eval_precision": 0.915626398388983, + "eval_recall": 0.9218403547671841, + "eval_runtime": 7.9183, + "eval_samples_per_second": 455.651, + "eval_steps_per_second": 3.662, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.5632478687766358e-05, + "loss": 0.6894, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.1813252107467344e-05, + "loss": 0.689, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.918341863867805, + "eval_loss": 0.7993049025535583, + "eval_precision": 0.9176384019616962, + "eval_recall": 0.9226718403547672, + "eval_runtime": 8.0485, + "eval_samples_per_second": 448.281, + "eval_steps_per_second": 3.603, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 8.495172794423214e-06, + "loss": 0.6883, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 5.700016566836142e-06, + "loss": 0.6849, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9163821172906673, + "eval_loss": 0.803982138633728, + "eval_precision": 0.913621352577376, + "eval_recall": 0.9212860310421286, + "eval_runtime": 8.0497, + "eval_samples_per_second": 448.215, + "eval_steps_per_second": 3.603, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 3.446127414742639e-06, + "loss": 0.6877, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9131722064707715, + "eval_loss": 0.8063819408416748, + "eval_precision": 0.9118112026907484, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.1938, + "eval_samples_per_second": 440.331, + "eval_steps_per_second": 3.539, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.7482971124776433e-06, + "loss": 0.688, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 6.176681435420739e-07, + "loss": 0.6888, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9162525358604651, + "eval_loss": 0.7972683906555176, + "eval_precision": 0.9136954590326909, + "eval_recall": 0.9210088691796009, + "eval_runtime": 8.0166, + "eval_samples_per_second": 450.069, + "eval_steps_per_second": 3.618, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 6.166057495631731e-08, + "loss": 0.6907, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9139489550536453, + "eval_loss": 0.8069316148757935, + "eval_precision": 0.9126206599681234, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.4998, + "eval_samples_per_second": 424.48, + "eval_steps_per_second": 3.412, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.04884842792144779, + "learning_rate": 0.00017635062827075305, + "metric": "eval/loss", + "weight_decay": 0.07457026044587774 + } +} diff --git a/run-n97r4jfo/checkpoint-630/training_args.bin b/run-n97r4jfo/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5db540fbd2e53bf10b3e0bc33e6fcbc1ac928d4d --- /dev/null +++ b/run-n97r4jfo/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946e922d5b635f3cc95ffb367e50486265cf9dddd2033baeccacdd3a246cce27 +size 4792 diff --git a/run-ni6jmlic/checkpoint-573/model.safetensors b/run-ni6jmlic/checkpoint-573/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b57ca00874a70862659ecf692895639e5684c190 --- /dev/null +++ b/run-ni6jmlic/checkpoint-573/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffcdeef3c4d962046d76169eb235803498667ba53cb538212ebf8941ba4229a6 +size 198025308 diff --git a/run-ni6jmlic/checkpoint-573/optimizer.pt b/run-ni6jmlic/checkpoint-573/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..27e372a78bfb5790107095ef88f4c18fcb1829d6 --- /dev/null +++ b/run-ni6jmlic/checkpoint-573/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:645983d4bce23ce656ba03a0fe43e73175608f30d64b93bbf13ed2cf5ea2c1e3 +size 395900602 diff --git a/run-ni6jmlic/checkpoint-573/rng_state.pth b/run-ni6jmlic/checkpoint-573/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e8713f9caaf617efce0d3935618a93ad2f5f391 --- /dev/null +++ b/run-ni6jmlic/checkpoint-573/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9102bb312b12c2313ea7585eb813beef5c548592778aaea4ab0516e14ecd38e5 +size 14244 diff --git a/run-ni6jmlic/checkpoint-573/scheduler.pt b/run-ni6jmlic/checkpoint-573/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9a02d4173fdbccc4a0d0304cbdcf245a6155bce --- /dev/null +++ b/run-ni6jmlic/checkpoint-573/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20c9dc48d4d207f57bc816ad2f7682e3ae02d2551bb9dadd7655256aba5eccd5 +size 1064 diff --git a/run-ni6jmlic/checkpoint-573/trainer_state.json b/run-ni6jmlic/checkpoint-573/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..16a25ca5be252f0d7a267f2446307517e4e960b2 --- /dev/null +++ b/run-ni6jmlic/checkpoint-573/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": 0.9196817570794136, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-ni6jmlic/checkpoint-573", + "epoch": 26.96470588235294, + "eval_steps": 500, + "global_step": 573, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.1592863193870879e-05, + "loss": 1.4884, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8267738359201774, + "eval_f1": 0.7502207029646053, + "eval_loss": 1.1233329772949219, + "eval_precision": 0.6866426772896389, + "eval_recall": 0.8267738359201774, + "eval_runtime": 8.0207, + "eval_samples_per_second": 449.833, + "eval_steps_per_second": 3.616, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.3185726387741758e-05, + "loss": 1.203, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 3.4778589581612635e-05, + "loss": 0.9612, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8536585365853658, + "eval_f1": 0.8059112740657348, + "eval_loss": 0.9280069470405579, + "eval_precision": 0.8313159276328042, + "eval_recall": 0.8536585365853658, + "eval_runtime": 8.0899, + "eval_samples_per_second": 445.986, + "eval_steps_per_second": 3.585, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 4.6371452775483515e-05, + "loss": 0.8833, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8952328159645233, + "eval_f1": 0.8856025100035607, + "eval_loss": 0.8618199229240417, + "eval_precision": 0.8881992746578007, + "eval_recall": 0.8952328159645233, + "eval_runtime": 8.3928, + "eval_samples_per_second": 429.892, + "eval_steps_per_second": 3.455, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 5.79643159693544e-05, + "loss": 0.8509, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 6.955717916322527e-05, + "loss": 0.8107, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9054561095270746, + "eval_loss": 0.8151156902313232, + "eval_precision": 0.9018069601797669, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.2488, + "eval_samples_per_second": 437.396, + "eval_steps_per_second": 3.516, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 8.115004235709615e-05, + "loss": 0.8029, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 9.274290555096703e-05, + "loss": 0.787, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.878880266075388, + "eval_f1": 0.8834736851554738, + "eval_loss": 0.8554702997207642, + "eval_precision": 0.8949039425659839, + "eval_recall": 0.878880266075388, + "eval_runtime": 8.1824, + "eval_samples_per_second": 440.945, + "eval_steps_per_second": 3.544, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00010433576874483791, + "loss": 0.7706, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.8944901188588041, + "eval_loss": 0.8181613683700562, + "eval_precision": 0.8960353881501544, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.1496, + "eval_samples_per_second": 442.721, + "eval_steps_per_second": 3.558, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011234413510642605, + "loss": 0.7743, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011204646837726084, + "loss": 0.765, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9099066062141843, + "eval_loss": 0.807576596736908, + "eval_precision": 0.9082184204905329, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.1352, + "eval_samples_per_second": 443.502, + "eval_steps_per_second": 3.565, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00011138216765444314, + "loss": 0.7595, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00011035559259619894, + "loss": 0.7546, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8406319290465631, + "eval_f1": 0.8546578722830316, + "eval_loss": 0.9063465595245361, + "eval_precision": 0.886576775896147, + "eval_recall": 0.8406319290465631, + "eval_runtime": 8.3061, + "eval_samples_per_second": 434.38, + "eval_steps_per_second": 3.491, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010897348038682383, + "loss": 0.7488, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9016632892656837, + "eval_loss": 0.8146984577178955, + "eval_precision": 0.9036364993138337, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.9707, + "eval_samples_per_second": 452.66, + "eval_steps_per_second": 3.638, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010724490152203746, + "loss": 0.7449, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010518120028133303, + "loss": 0.7381, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8941241685144125, + "eval_f1": 0.8958677489017312, + "eval_loss": 0.8317134976387024, + "eval_precision": 0.9030112154551658, + "eval_recall": 0.8941241685144125, + "eval_runtime": 8.3192, + "eval_samples_per_second": 433.698, + "eval_steps_per_second": 3.486, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00010279592027798844, + "loss": 0.7326, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.9061787936798691, + "eval_loss": 0.820725679397583, + "eval_precision": 0.9078551011459483, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.3526, + "eval_samples_per_second": 431.959, + "eval_steps_per_second": 3.472, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00010010471557533789, + "loss": 0.7317, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.712524795262801e-05, + "loss": 0.7254, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9055660907341075, + "eval_loss": 0.8079713582992554, + "eval_precision": 0.9012294135839911, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.4858, + "eval_samples_per_second": 425.182, + "eval_steps_per_second": 3.417, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 9.387707099468018e-05, + "loss": 0.7266, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 9.038150176605246e-05, + "loss": 0.7193, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8758314855875832, + "eval_f1": 0.883592145352654, + "eval_loss": 0.8608985543251038, + "eval_precision": 0.9005262488271947, + "eval_recall": 0.8758314855875832, + "eval_runtime": 8.5247, + "eval_samples_per_second": 423.243, + "eval_steps_per_second": 3.402, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.666148091187547e-05, + "loss": 0.7211, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9075217383334674, + "eval_loss": 0.8079153299331665, + "eval_precision": 0.9061862019389731, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.2608, + "eval_samples_per_second": 436.764, + "eval_steps_per_second": 3.511, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 8.274142210348934e-05, + "loss": 0.7137, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.864705181693634e-05, + "loss": 0.7143, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.912505455611038, + "eval_loss": 0.7998226881027222, + "eval_precision": 0.910795010022744, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.3882, + "eval_samples_per_second": 430.128, + "eval_steps_per_second": 3.457, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 7.440524049580779e-05, + "loss": 0.7136, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 7.004382620648559e-05, + "loss": 0.7137, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.892159290072615, + "eval_loss": 0.8286277651786804, + "eval_precision": 0.9014757023748903, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.2082, + "eval_samples_per_second": 439.563, + "eval_steps_per_second": 3.533, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.559143194309035e-05, + "loss": 0.7092, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8944013303769401, + "eval_f1": 0.8977075703409133, + "eval_loss": 0.8341405391693115, + "eval_precision": 0.905135528349818, + "eval_recall": 0.8944013303769401, + "eval_runtime": 8.1515, + "eval_samples_per_second": 442.619, + "eval_steps_per_second": 3.558, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.107727778112271e-05, + "loss": 0.7082, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.653098911259225e-05, + "loss": 0.7065, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9126222252013378, + "eval_loss": 0.8034656047821045, + "eval_precision": 0.9096322734074553, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.1641, + "eval_samples_per_second": 441.933, + "eval_steps_per_second": 3.552, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.1982402221144044e-05, + "loss": 0.7083, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.746136847315013e-05, + "loss": 0.7003, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9085809401526197, + "eval_loss": 0.812218189239502, + "eval_precision": 0.9081865752546688, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.2182, + "eval_samples_per_second": 439.027, + "eval_steps_per_second": 3.529, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.2997558409816544e-05, + "loss": 0.708, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9091136129522319, + "eval_loss": 0.8089614510536194, + "eval_precision": 0.9072602723364859, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.418, + "eval_samples_per_second": 428.606, + "eval_steps_per_second": 3.445, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.862026702600555e-05, + "loss": 0.6999, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.435822151368554e-05, + "loss": 0.6973, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.913505315826131, + "eval_loss": 0.8085691332817078, + "eval_precision": 0.9114966851558827, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.4942, + "eval_samples_per_second": 424.761, + "eval_steps_per_second": 3.414, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.023939273174499e-05, + "loss": 0.7009, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.91066330383907, + "eval_loss": 0.8090089559555054, + "eval_precision": 0.909586307374987, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.3756, + "eval_samples_per_second": 430.777, + "eval_steps_per_second": 3.462, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.6290811639452693e-05, + "loss": 0.6986, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.2538391898270736e-05, + "loss": 0.6962, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9106257130556433, + "eval_loss": 0.8098848462104797, + "eval_precision": 0.909694886334761, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.5067, + "eval_samples_per_second": 424.136, + "eval_steps_per_second": 3.409, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.9006759806244882e-05, + "loss": 0.6982, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.5719092681075352e-05, + "loss": 0.6959, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.917060911649654, + "eval_loss": 0.8006868362426758, + "eval_precision": 0.9152968522820271, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.264, + "eval_samples_per_second": 436.593, + "eval_steps_per_second": 3.509, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.2696966752524273e-05, + "loss": 0.6953, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.914576036681704, + "eval_loss": 0.805885374546051, + "eval_precision": 0.9132934104849693, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.1538, + "eval_samples_per_second": 442.492, + "eval_steps_per_second": 3.557, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 9.960215562407737e-06, + "loss": 0.6956, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 7.526799801462221e-06, + "loss": 0.6935, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9158342153962471, + "eval_loss": 0.7976793646812439, + "eval_precision": 0.9145341154630037, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.1452, + "eval_samples_per_second": 442.961, + "eval_steps_per_second": 3.56, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.412689437317053e-06, + "loss": 0.6927, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.6317589071407394e-06, + "loss": 0.69, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9196817570794136, + "eval_loss": 0.80423903465271, + "eval_precision": 0.917770821456162, + "eval_recall": 0.9229490022172949, + "eval_runtime": 8.0091, + "eval_samples_per_second": 450.489, + "eval_steps_per_second": 3.621, + "step": 573 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.07869718688467646, + "learning_rate": 0.00011236159710982544, + "metric": "eval/loss", + "weight_decay": 0.03719869145858272 + } +} diff --git a/run-ni6jmlic/checkpoint-573/training_args.bin b/run-ni6jmlic/checkpoint-573/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2fb677b0034b49b491f29de9f7e67a81c14d4686 --- /dev/null +++ b/run-ni6jmlic/checkpoint-573/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac82b21bd1fd572b600d0b29c6611e1df99e5693757d993cdb7939eca2da3ad +size 4792 diff --git a/run-ni6jmlic/checkpoint-630/model.safetensors b/run-ni6jmlic/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01832139b34216bab078a2e4054c6b04beaf970e --- /dev/null +++ b/run-ni6jmlic/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d9458861f779361274f0cb39ac1ff70d974caed09c00725e91c4ab366e7c559 +size 198025308 diff --git a/run-ni6jmlic/checkpoint-630/optimizer.pt b/run-ni6jmlic/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae238de7240592b61efdaebc2bc3d5c444eb5ea4 --- /dev/null +++ b/run-ni6jmlic/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c670c3fcae1dd25fc0833824660c9a7e9fb1f2f100475041923a86c74adc2893 +size 395900602 diff --git a/run-ni6jmlic/checkpoint-630/rng_state.pth b/run-ni6jmlic/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-ni6jmlic/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-ni6jmlic/checkpoint-630/scheduler.pt b/run-ni6jmlic/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ff3ecdab8767fb1d9890d2fcaff0f9673003143 --- /dev/null +++ b/run-ni6jmlic/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74970584b6a06675225b80c0fdc83b04006fb889d261abf9cd71d880235869b1 +size 1064 diff --git a/run-ni6jmlic/checkpoint-630/trainer_state.json b/run-ni6jmlic/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d52d417659ac357e0952951a3fdd63ebf4789de0 --- /dev/null +++ b/run-ni6jmlic/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9196817570794136, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-ni6jmlic/checkpoint-573", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.1592863193870879e-05, + "loss": 1.4884, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8267738359201774, + "eval_f1": 0.7502207029646053, + "eval_loss": 1.1233329772949219, + "eval_precision": 0.6866426772896389, + "eval_recall": 0.8267738359201774, + "eval_runtime": 8.0207, + "eval_samples_per_second": 449.833, + "eval_steps_per_second": 3.616, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.3185726387741758e-05, + "loss": 1.203, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 3.4778589581612635e-05, + "loss": 0.9612, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8536585365853658, + "eval_f1": 0.8059112740657348, + "eval_loss": 0.9280069470405579, + "eval_precision": 0.8313159276328042, + "eval_recall": 0.8536585365853658, + "eval_runtime": 8.0899, + "eval_samples_per_second": 445.986, + "eval_steps_per_second": 3.585, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 4.6371452775483515e-05, + "loss": 0.8833, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8952328159645233, + "eval_f1": 0.8856025100035607, + "eval_loss": 0.8618199229240417, + "eval_precision": 0.8881992746578007, + "eval_recall": 0.8952328159645233, + "eval_runtime": 8.3928, + "eval_samples_per_second": 429.892, + "eval_steps_per_second": 3.455, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 5.79643159693544e-05, + "loss": 0.8509, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 6.955717916322527e-05, + "loss": 0.8107, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9054561095270746, + "eval_loss": 0.8151156902313232, + "eval_precision": 0.9018069601797669, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.2488, + "eval_samples_per_second": 437.396, + "eval_steps_per_second": 3.516, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 8.115004235709615e-05, + "loss": 0.8029, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 9.274290555096703e-05, + "loss": 0.787, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.878880266075388, + "eval_f1": 0.8834736851554738, + "eval_loss": 0.8554702997207642, + "eval_precision": 0.8949039425659839, + "eval_recall": 0.878880266075388, + "eval_runtime": 8.1824, + "eval_samples_per_second": 440.945, + "eval_steps_per_second": 3.544, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00010433576874483791, + "loss": 0.7706, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.8944901188588041, + "eval_loss": 0.8181613683700562, + "eval_precision": 0.8960353881501544, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.1496, + "eval_samples_per_second": 442.721, + "eval_steps_per_second": 3.558, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011234413510642605, + "loss": 0.7743, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011204646837726084, + "loss": 0.765, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9099066062141843, + "eval_loss": 0.807576596736908, + "eval_precision": 0.9082184204905329, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.1352, + "eval_samples_per_second": 443.502, + "eval_steps_per_second": 3.565, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00011138216765444314, + "loss": 0.7595, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00011035559259619894, + "loss": 0.7546, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8406319290465631, + "eval_f1": 0.8546578722830316, + "eval_loss": 0.9063465595245361, + "eval_precision": 0.886576775896147, + "eval_recall": 0.8406319290465631, + "eval_runtime": 8.3061, + "eval_samples_per_second": 434.38, + "eval_steps_per_second": 3.491, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00010897348038682383, + "loss": 0.7488, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9016632892656837, + "eval_loss": 0.8146984577178955, + "eval_precision": 0.9036364993138337, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.9707, + "eval_samples_per_second": 452.66, + "eval_steps_per_second": 3.638, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010724490152203746, + "loss": 0.7449, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010518120028133303, + "loss": 0.7381, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8941241685144125, + "eval_f1": 0.8958677489017312, + "eval_loss": 0.8317134976387024, + "eval_precision": 0.9030112154551658, + "eval_recall": 0.8941241685144125, + "eval_runtime": 8.3192, + "eval_samples_per_second": 433.698, + "eval_steps_per_second": 3.486, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00010279592027798844, + "loss": 0.7326, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.9061787936798691, + "eval_loss": 0.820725679397583, + "eval_precision": 0.9078551011459483, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.3526, + "eval_samples_per_second": 431.959, + "eval_steps_per_second": 3.472, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00010010471557533789, + "loss": 0.7317, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.712524795262801e-05, + "loss": 0.7254, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9055660907341075, + "eval_loss": 0.8079713582992554, + "eval_precision": 0.9012294135839911, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.4858, + "eval_samples_per_second": 425.182, + "eval_steps_per_second": 3.417, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 9.387707099468018e-05, + "loss": 0.7266, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 9.038150176605246e-05, + "loss": 0.7193, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8758314855875832, + "eval_f1": 0.883592145352654, + "eval_loss": 0.8608985543251038, + "eval_precision": 0.9005262488271947, + "eval_recall": 0.8758314855875832, + "eval_runtime": 8.5247, + "eval_samples_per_second": 423.243, + "eval_steps_per_second": 3.402, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.666148091187547e-05, + "loss": 0.7211, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9075217383334674, + "eval_loss": 0.8079153299331665, + "eval_precision": 0.9061862019389731, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.2608, + "eval_samples_per_second": 436.764, + "eval_steps_per_second": 3.511, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 8.274142210348934e-05, + "loss": 0.7137, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.864705181693634e-05, + "loss": 0.7143, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.912505455611038, + "eval_loss": 0.7998226881027222, + "eval_precision": 0.910795010022744, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.3882, + "eval_samples_per_second": 430.128, + "eval_steps_per_second": 3.457, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 7.440524049580779e-05, + "loss": 0.7136, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 7.004382620648559e-05, + "loss": 0.7137, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.892159290072615, + "eval_loss": 0.8286277651786804, + "eval_precision": 0.9014757023748903, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.2082, + "eval_samples_per_second": 439.563, + "eval_steps_per_second": 3.533, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.559143194309035e-05, + "loss": 0.7092, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8944013303769401, + "eval_f1": 0.8977075703409133, + "eval_loss": 0.8341405391693115, + "eval_precision": 0.905135528349818, + "eval_recall": 0.8944013303769401, + "eval_runtime": 8.1515, + "eval_samples_per_second": 442.619, + "eval_steps_per_second": 3.558, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.107727778112271e-05, + "loss": 0.7082, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.653098911259225e-05, + "loss": 0.7065, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9126222252013378, + "eval_loss": 0.8034656047821045, + "eval_precision": 0.9096322734074553, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.1641, + "eval_samples_per_second": 441.933, + "eval_steps_per_second": 3.552, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.1982402221144044e-05, + "loss": 0.7083, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.746136847315013e-05, + "loss": 0.7003, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9085809401526197, + "eval_loss": 0.812218189239502, + "eval_precision": 0.9081865752546688, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.2182, + "eval_samples_per_second": 439.027, + "eval_steps_per_second": 3.529, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.2997558409816544e-05, + "loss": 0.708, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9091136129522319, + "eval_loss": 0.8089614510536194, + "eval_precision": 0.9072602723364859, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.418, + "eval_samples_per_second": 428.606, + "eval_steps_per_second": 3.445, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.862026702600555e-05, + "loss": 0.6999, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.435822151368554e-05, + "loss": 0.6973, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.913505315826131, + "eval_loss": 0.8085691332817078, + "eval_precision": 0.9114966851558827, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.4942, + "eval_samples_per_second": 424.761, + "eval_steps_per_second": 3.414, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.023939273174499e-05, + "loss": 0.7009, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.91066330383907, + "eval_loss": 0.8090089559555054, + "eval_precision": 0.909586307374987, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.3756, + "eval_samples_per_second": 430.777, + "eval_steps_per_second": 3.462, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.6290811639452693e-05, + "loss": 0.6986, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.2538391898270736e-05, + "loss": 0.6962, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9106257130556433, + "eval_loss": 0.8098848462104797, + "eval_precision": 0.909694886334761, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.5067, + "eval_samples_per_second": 424.136, + "eval_steps_per_second": 3.409, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.9006759806244882e-05, + "loss": 0.6982, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.5719092681075352e-05, + "loss": 0.6959, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.917060911649654, + "eval_loss": 0.8006868362426758, + "eval_precision": 0.9152968522820271, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.264, + "eval_samples_per_second": 436.593, + "eval_steps_per_second": 3.509, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.2696966752524273e-05, + "loss": 0.6953, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.914576036681704, + "eval_loss": 0.805885374546051, + "eval_precision": 0.9132934104849693, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.1538, + "eval_samples_per_second": 442.492, + "eval_steps_per_second": 3.557, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 9.960215562407737e-06, + "loss": 0.6956, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 7.526799801462221e-06, + "loss": 0.6935, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9158342153962471, + "eval_loss": 0.7976793646812439, + "eval_precision": 0.9145341154630037, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.1452, + "eval_samples_per_second": 442.961, + "eval_steps_per_second": 3.56, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.412689437317053e-06, + "loss": 0.6927, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.6317589071407394e-06, + "loss": 0.69, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9196817570794136, + "eval_loss": 0.80423903465271, + "eval_precision": 0.917770821456162, + "eval_recall": 0.9229490022172949, + "eval_runtime": 8.0091, + "eval_samples_per_second": 450.489, + "eval_steps_per_second": 3.621, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.195696062788874e-06, + "loss": 0.6954, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9122935470114186, + "eval_loss": 0.8046474456787109, + "eval_precision": 0.9103427151627836, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0267, + "eval_samples_per_second": 449.502, + "eval_steps_per_second": 3.613, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.1139254660260434e-06, + "loss": 0.6945, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.9354653721842376e-07, + "loss": 0.6944, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9226718403547672, + "eval_f1": 0.9180301113842996, + "eval_loss": 0.7997339367866516, + "eval_precision": 0.9152194916044069, + "eval_recall": 0.9226718403547672, + "eval_runtime": 8.0846, + "eval_samples_per_second": 446.282, + "eval_steps_per_second": 3.587, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 3.928696341339285e-08, + "loss": 0.6952, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9148587513682961, + "eval_loss": 0.8069359660148621, + "eval_precision": 0.9142263647369346, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.2366, + "eval_samples_per_second": 438.046, + "eval_steps_per_second": 3.521, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.07869718688467646, + "learning_rate": 0.00011236159710982544, + "metric": "eval/loss", + "weight_decay": 0.03719869145858272 + } +} diff --git a/run-ni6jmlic/checkpoint-630/training_args.bin b/run-ni6jmlic/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2fb677b0034b49b491f29de9f7e67a81c14d4686 --- /dev/null +++ b/run-ni6jmlic/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac82b21bd1fd572b600d0b29c6611e1df99e5693757d993cdb7939eca2da3ad +size 4792 diff --git a/run-niq6yxyt/checkpoint-1105/model.safetensors b/run-niq6yxyt/checkpoint-1105/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e46b1d595e7deefa4449b3ae9e31b6d9b546034e --- /dev/null +++ b/run-niq6yxyt/checkpoint-1105/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40907479eebb010411fe38aa6e8032003e4ed6614f229d841d652cf9c3e0a5d3 +size 198025308 diff --git a/run-niq6yxyt/checkpoint-1105/optimizer.pt b/run-niq6yxyt/checkpoint-1105/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2412b3edb540b34858c0d5578668702b143f1bc --- /dev/null +++ b/run-niq6yxyt/checkpoint-1105/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e587835460fa1bfe4514c0de37fcbe9df48b5ba36bada665136da24eb0e5c88 +size 395900602 diff --git a/run-niq6yxyt/checkpoint-1105/rng_state.pth b/run-niq6yxyt/checkpoint-1105/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bfd69dc3eaa0847aabf0337c1ad85f5c3279ee47 --- /dev/null +++ b/run-niq6yxyt/checkpoint-1105/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f99f80df2e12a39424bcc4a26062a4e6cfd833bf0e3008f84ee50071a2b760 +size 14244 diff --git a/run-niq6yxyt/checkpoint-1105/scheduler.pt b/run-niq6yxyt/checkpoint-1105/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..64adfac4a7abbfa9241375352151569ed13dbbfb --- /dev/null +++ b/run-niq6yxyt/checkpoint-1105/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:facdcd2841d8b7b41c1f01eb8be545c657c23d60f2bb9a143ebfd96ca9b58fa7 +size 1064 diff --git a/run-niq6yxyt/checkpoint-1105/trainer_state.json b/run-niq6yxyt/checkpoint-1105/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e04883b7d589b7039f4aa4421f9de962e16e43c7 --- /dev/null +++ b/run-niq6yxyt/checkpoint-1105/trainer_state.json @@ -0,0 +1,513 @@ +{ + "best_metric": 0.9237804878048781, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-niq6yxyt/checkpoint-1105", + "epoch": 26.0, + "eval_steps": 500, + "global_step": 1105, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.704277745971892e-05, + "loss": 1.2895, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8738913525498891, + "eval_loss": 0.9118460416793823, + "eval_runtime": 7.02, + "eval_samples_per_second": 513.957, + "eval_steps_per_second": 8.12, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 7.408555491943784e-05, + "loss": 0.9155, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011112833237915676, + "loss": 0.8266, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8188580870628357, + "eval_runtime": 6.8362, + "eval_samples_per_second": 527.781, + "eval_steps_per_second": 8.338, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00014817110983887568, + "loss": 0.807, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8334592580795288, + "eval_runtime": 7.2929, + "eval_samples_per_second": 494.731, + "eval_steps_per_second": 7.816, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001852138872985946, + "loss": 0.7965, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00022225666475831352, + "loss": 0.7822, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8877494456762749, + "eval_loss": 0.8614720106124878, + "eval_runtime": 6.7904, + "eval_samples_per_second": 531.34, + "eval_steps_per_second": 8.394, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00025929944221803243, + "loss": 0.78, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00029634221967775136, + "loss": 0.7759, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8991130820399114, + "eval_loss": 0.8301498293876648, + "eval_runtime": 6.9898, + "eval_samples_per_second": 516.177, + "eval_steps_per_second": 8.155, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00032767391452942816, + "loss": 0.7795, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8356430155210643, + "eval_loss": 0.9038041234016418, + "eval_runtime": 7.2347, + "eval_samples_per_second": 498.708, + "eval_steps_per_second": 7.879, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003270006787788003, + "loss": 0.7774, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0003253019050395, + "loss": 0.7656, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8603104212860311, + "eval_loss": 0.8784758448600769, + "eval_runtime": 6.8451, + "eval_samples_per_second": 527.091, + "eval_steps_per_second": 8.327, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00032258827106567913, + "loss": 0.7647, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00031887683358229206, + "loss": 0.7543, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8412770628929138, + "eval_runtime": 6.9021, + "eval_samples_per_second": 522.743, + "eval_steps_per_second": 8.258, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00031419092107392796, + "loss": 0.7438, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8318120241165161, + "eval_runtime": 6.8586, + "eval_samples_per_second": 526.056, + "eval_steps_per_second": 8.311, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003085599871520855, + "loss": 0.7488, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0003020194254225597, + "loss": 0.7496, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8264697194099426, + "eval_runtime": 6.9641, + "eval_samples_per_second": 518.089, + "eval_steps_per_second": 8.185, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.000294610347016602, + "loss": 0.7465, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.818407416343689, + "eval_runtime": 6.8994, + "eval_samples_per_second": 522.946, + "eval_steps_per_second": 8.262, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002863793221841998, + "loss": 0.7332, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0002773780875737046, + "loss": 0.7332, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8397746086120605, + "eval_runtime": 6.8749, + "eval_samples_per_second": 524.807, + "eval_steps_per_second": 8.291, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00026766322103772324, + "loss": 0.7353, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0002572957860093017, + "loss": 0.7303, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8242444396018982, + "eval_runtime": 6.752, + "eval_samples_per_second": 534.362, + "eval_steps_per_second": 8.442, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00024634094768369736, + "loss": 0.7266, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8199712038040161, + "eval_runtime": 6.7981, + "eval_samples_per_second": 530.733, + "eval_steps_per_second": 8.385, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00023486756341825488, + "loss": 0.7318, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002229477499249547, + "loss": 0.7142, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8265474438667297, + "eval_runtime": 7.2154, + "eval_samples_per_second": 500.041, + "eval_steps_per_second": 7.9, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002106564299760729, + "loss": 0.7212, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001980708614721648, + "loss": 0.7169, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8175589442253113, + "eval_runtime": 6.7568, + "eval_samples_per_second": 533.978, + "eval_steps_per_second": 8.436, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00018527015183244586, + "loss": 0.7164, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8175287842750549, + "eval_runtime": 6.9142, + "eval_samples_per_second": 521.823, + "eval_steps_per_second": 8.244, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00017233476075990115, + "loss": 0.7102, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00015934599450652666, + "loss": 0.709, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.810811460018158, + "eval_runtime": 7.0085, + "eval_samples_per_second": 514.806, + "eval_steps_per_second": 8.133, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00014638549481753055, + "loss": 0.7037, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00013353472576677013, + "loss": 0.7044, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8139188289642334, + "eval_runtime": 7.0277, + "eval_samples_per_second": 513.397, + "eval_steps_per_second": 8.111, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00012087446170895358, + "loss": 0.7085, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8100966215133667, + "eval_runtime": 6.5613, + "eval_samples_per_second": 549.889, + "eval_steps_per_second": 8.687, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00010848427956711692, + "loss": 0.6988, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 9.64420586466363e-05, + "loss": 0.6978, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8187349438667297, + "eval_runtime": 6.8146, + "eval_samples_per_second": 529.449, + "eval_steps_per_second": 8.364, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 8.48234911197283e-05, + "loss": 0.6927, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8153396248817444, + "eval_runtime": 6.6993, + "eval_samples_per_second": 538.566, + "eval_steps_per_second": 8.508, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 7.370160625731931e-05, + "loss": 0.6984, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 6.314631139875598e-05, + "loss": 0.6963, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.8042484521865845, + "eval_runtime": 6.7002, + "eval_samples_per_second": 538.492, + "eval_steps_per_second": 8.507, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 5.3223952544623184e-05, + "loss": 0.6966, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 4.399689733458969e-05, + "loss": 0.689, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8020826578140259, + "eval_runtime": 6.8091, + "eval_samples_per_second": 529.875, + "eval_steps_per_second": 8.371, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 3.552314303150438e-05, + "loss": 0.6856, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8099854588508606, + "eval_runtime": 6.9109, + "eval_samples_per_second": 522.071, + "eval_steps_per_second": 8.248, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 2.7855951975783075e-05, + "loss": 0.6878, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.1043516801462337e-05, + "loss": 0.6874, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9237804878048781, + "eval_loss": 0.7999135851860046, + "eval_runtime": 6.7338, + "eval_samples_per_second": 535.807, + "eval_steps_per_second": 8.465, + "step": 1105 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00032768610829751353, + "metric": "eval/loss", + "warmup_ratio": 0.18185993284348856 + } +} diff --git a/run-niq6yxyt/checkpoint-1105/training_args.bin b/run-niq6yxyt/checkpoint-1105/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..94f129706113adb1dbea63ece6578270f211ff6a --- /dev/null +++ b/run-niq6yxyt/checkpoint-1105/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fefef9cd36176dfa2fd010099052e9432a91318acfff8d136bb28d7f54d7d587 +size 4792 diff --git a/run-niq6yxyt/checkpoint-1260/model.safetensors b/run-niq6yxyt/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f11dce786bb1c0607a124d34a165e7b9be96fa9 --- /dev/null +++ b/run-niq6yxyt/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf34c58fde49c1026bb210d5c21884e7f221d90f62f3c79cb0b0323fd92f9b70 +size 198025308 diff --git a/run-niq6yxyt/checkpoint-1260/optimizer.pt b/run-niq6yxyt/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a41a3672353c81180c3358d5cedf65eef6b40178 --- /dev/null +++ b/run-niq6yxyt/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ae4d4f4cc6aec0024c20815534310724dfd650160f7d61628f21f0966f4d4d +size 395900602 diff --git a/run-niq6yxyt/checkpoint-1260/rng_state.pth b/run-niq6yxyt/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-niq6yxyt/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-niq6yxyt/checkpoint-1260/scheduler.pt b/run-niq6yxyt/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..85e88a602acce28801723047636cb33e6d5a0f63 --- /dev/null +++ b/run-niq6yxyt/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e40099f8fe9bf4c8d2b22a1f581436f111cb4f174df037f670f67cf454c9aec5 +size 1064 diff --git a/run-niq6yxyt/checkpoint-1260/trainer_state.json b/run-niq6yxyt/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bb729c3e424a086fe22ac7214e41806d32ea85d8 --- /dev/null +++ b/run-niq6yxyt/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9237804878048781, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-niq6yxyt/checkpoint-1105", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.704277745971892e-05, + "loss": 1.2895, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8738913525498891, + "eval_loss": 0.9118460416793823, + "eval_runtime": 7.02, + "eval_samples_per_second": 513.957, + "eval_steps_per_second": 8.12, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 7.408555491943784e-05, + "loss": 0.9155, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011112833237915676, + "loss": 0.8266, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8188580870628357, + "eval_runtime": 6.8362, + "eval_samples_per_second": 527.781, + "eval_steps_per_second": 8.338, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00014817110983887568, + "loss": 0.807, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8334592580795288, + "eval_runtime": 7.2929, + "eval_samples_per_second": 494.731, + "eval_steps_per_second": 7.816, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001852138872985946, + "loss": 0.7965, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00022225666475831352, + "loss": 0.7822, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8877494456762749, + "eval_loss": 0.8614720106124878, + "eval_runtime": 6.7904, + "eval_samples_per_second": 531.34, + "eval_steps_per_second": 8.394, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00025929944221803243, + "loss": 0.78, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00029634221967775136, + "loss": 0.7759, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8991130820399114, + "eval_loss": 0.8301498293876648, + "eval_runtime": 6.9898, + "eval_samples_per_second": 516.177, + "eval_steps_per_second": 8.155, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00032767391452942816, + "loss": 0.7795, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8356430155210643, + "eval_loss": 0.9038041234016418, + "eval_runtime": 7.2347, + "eval_samples_per_second": 498.708, + "eval_steps_per_second": 7.879, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003270006787788003, + "loss": 0.7774, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0003253019050395, + "loss": 0.7656, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8603104212860311, + "eval_loss": 0.8784758448600769, + "eval_runtime": 6.8451, + "eval_samples_per_second": 527.091, + "eval_steps_per_second": 8.327, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00032258827106567913, + "loss": 0.7647, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00031887683358229206, + "loss": 0.7543, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8412770628929138, + "eval_runtime": 6.9021, + "eval_samples_per_second": 522.743, + "eval_steps_per_second": 8.258, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00031419092107392796, + "loss": 0.7438, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8318120241165161, + "eval_runtime": 6.8586, + "eval_samples_per_second": 526.056, + "eval_steps_per_second": 8.311, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003085599871520855, + "loss": 0.7488, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0003020194254225597, + "loss": 0.7496, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8264697194099426, + "eval_runtime": 6.9641, + "eval_samples_per_second": 518.089, + "eval_steps_per_second": 8.185, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.000294610347016602, + "loss": 0.7465, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.818407416343689, + "eval_runtime": 6.8994, + "eval_samples_per_second": 522.946, + "eval_steps_per_second": 8.262, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002863793221841998, + "loss": 0.7332, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0002773780875737046, + "loss": 0.7332, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8397746086120605, + "eval_runtime": 6.8749, + "eval_samples_per_second": 524.807, + "eval_steps_per_second": 8.291, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00026766322103772324, + "loss": 0.7353, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0002572957860093017, + "loss": 0.7303, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9077050997782705, + "eval_loss": 0.8242444396018982, + "eval_runtime": 6.752, + "eval_samples_per_second": 534.362, + "eval_steps_per_second": 8.442, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00024634094768369736, + "loss": 0.7266, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8199712038040161, + "eval_runtime": 6.7981, + "eval_samples_per_second": 530.733, + "eval_steps_per_second": 8.385, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00023486756341825488, + "loss": 0.7318, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002229477499249547, + "loss": 0.7142, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8265474438667297, + "eval_runtime": 7.2154, + "eval_samples_per_second": 500.041, + "eval_steps_per_second": 7.9, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002106564299760729, + "loss": 0.7212, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001980708614721648, + "loss": 0.7169, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8175589442253113, + "eval_runtime": 6.7568, + "eval_samples_per_second": 533.978, + "eval_steps_per_second": 8.436, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00018527015183244586, + "loss": 0.7164, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8175287842750549, + "eval_runtime": 6.9142, + "eval_samples_per_second": 521.823, + "eval_steps_per_second": 8.244, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00017233476075990115, + "loss": 0.7102, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00015934599450652666, + "loss": 0.709, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.810811460018158, + "eval_runtime": 7.0085, + "eval_samples_per_second": 514.806, + "eval_steps_per_second": 8.133, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00014638549481753055, + "loss": 0.7037, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00013353472576677013, + "loss": 0.7044, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8139188289642334, + "eval_runtime": 7.0277, + "eval_samples_per_second": 513.397, + "eval_steps_per_second": 8.111, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00012087446170895358, + "loss": 0.7085, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8100966215133667, + "eval_runtime": 6.5613, + "eval_samples_per_second": 549.889, + "eval_steps_per_second": 8.687, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00010848427956711692, + "loss": 0.6988, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 9.64420586466363e-05, + "loss": 0.6978, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8187349438667297, + "eval_runtime": 6.8146, + "eval_samples_per_second": 529.449, + "eval_steps_per_second": 8.364, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 8.48234911197283e-05, + "loss": 0.6927, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8153396248817444, + "eval_runtime": 6.6993, + "eval_samples_per_second": 538.566, + "eval_steps_per_second": 8.508, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 7.370160625731931e-05, + "loss": 0.6984, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 6.314631139875598e-05, + "loss": 0.6963, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.8042484521865845, + "eval_runtime": 6.7002, + "eval_samples_per_second": 538.492, + "eval_steps_per_second": 8.507, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 5.3223952544623184e-05, + "loss": 0.6966, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 4.399689733458969e-05, + "loss": 0.689, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8020826578140259, + "eval_runtime": 6.8091, + "eval_samples_per_second": 529.875, + "eval_steps_per_second": 8.371, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 3.552314303150438e-05, + "loss": 0.6856, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8099854588508606, + "eval_runtime": 6.9109, + "eval_samples_per_second": 522.071, + "eval_steps_per_second": 8.248, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 2.7855951975783075e-05, + "loss": 0.6878, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.1043516801462337e-05, + "loss": 0.6874, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9237804878048781, + "eval_loss": 0.7999135851860046, + "eval_runtime": 6.7338, + "eval_samples_per_second": 535.807, + "eval_steps_per_second": 8.465, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.5128657518225842e-05, + "loss": 0.6882, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.0148552363415356e-05, + "loss": 0.6915, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.804320752620697, + "eval_runtime": 6.8425, + "eval_samples_per_second": 527.294, + "eval_steps_per_second": 8.33, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 6.134504115772704e-06, + "loss": 0.6884, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8091490268707275, + "eval_runtime": 6.9681, + "eval_samples_per_second": 517.785, + "eval_steps_per_second": 8.18, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 3.1117433397637357e-06, + "loss": 0.6811, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.0992697972046642e-06, + "loss": 0.6874, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8055908679962158, + "eval_runtime": 6.3905, + "eval_samples_per_second": 564.589, + "eval_steps_per_second": 8.92, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.0973302300853166e-07, + "loss": 0.688, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.8016838431358337, + "eval_runtime": 6.9288, + "eval_samples_per_second": 520.723, + "eval_steps_per_second": 8.227, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00032768610829751353, + "metric": "eval/loss", + "warmup_ratio": 0.18185993284348856 + } +} diff --git a/run-niq6yxyt/checkpoint-1260/training_args.bin b/run-niq6yxyt/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..94f129706113adb1dbea63ece6578270f211ff6a --- /dev/null +++ b/run-niq6yxyt/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fefef9cd36176dfa2fd010099052e9432a91318acfff8d136bb28d7f54d7d587 +size 4792 diff --git a/run-np97sxtx/checkpoint-616/model.safetensors b/run-np97sxtx/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0eac5989fd69d24836dd6026f52c69c1a0144285 --- /dev/null +++ b/run-np97sxtx/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43139f9f0432b51be4b09fabfc30e630ab906ded189906090ec7a3a12d7cd2b8 +size 198025308 diff --git a/run-np97sxtx/checkpoint-616/optimizer.pt b/run-np97sxtx/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..36dfc37a095ef6f708ca97812b06a15fba2d2f2c --- /dev/null +++ b/run-np97sxtx/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f6679f65a3396781749d9aa6e21a231a3bd0772aeab5760ad29085f11bf20bc +size 395900602 diff --git a/run-np97sxtx/checkpoint-616/rng_state.pth b/run-np97sxtx/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-np97sxtx/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-np97sxtx/checkpoint-616/scheduler.pt b/run-np97sxtx/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..380f99bc34d9beae7d6be65ce6266a7931909053 --- /dev/null +++ b/run-np97sxtx/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa48c46dd8aee688b34027c9f8900fdceadc0cd7d376e0c9dcc3204812b3de2c +size 1064 diff --git a/run-np97sxtx/checkpoint-616/trainer_state.json b/run-np97sxtx/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..96c05d7a83a2e76b4ae6c744d8a63d49cccba610 --- /dev/null +++ b/run-np97sxtx/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9197497326060817, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-np97sxtx/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.5421877357377304e-05, + "loss": 1.4827, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8281596452328159, + "eval_f1": 0.750566761552725, + "eval_loss": 1.0604153871536255, + "eval_precision": 0.6862681396176564, + "eval_recall": 0.8281596452328159, + "eval_runtime": 8.1608, + "eval_samples_per_second": 442.113, + "eval_steps_per_second": 3.554, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.084375471475461e-05, + "loss": 1.1445, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.626563207213191e-05, + "loss": 0.9462, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.871119733924612, + "eval_f1": 0.837812194732652, + "eval_loss": 0.9149242043495178, + "eval_precision": 0.8627963579904606, + "eval_recall": 0.871119733924612, + "eval_runtime": 7.9189, + "eval_samples_per_second": 455.62, + "eval_steps_per_second": 3.662, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.168750942950922e-05, + "loss": 0.874, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9013916307527915, + "eval_loss": 0.8876226544380188, + "eval_precision": 0.8997638438567944, + "eval_recall": 0.9082594235033259, + "eval_runtime": 7.6742, + "eval_samples_per_second": 470.144, + "eval_steps_per_second": 3.779, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.710938678688652e-05, + "loss": 0.8324, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.253126414426382e-05, + "loss": 0.8033, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9052814539518259, + "eval_loss": 0.805810809135437, + "eval_precision": 0.901509718560917, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.8908, + "eval_samples_per_second": 457.243, + "eval_steps_per_second": 3.675, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010795314150164112, + "loss": 0.7996, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011261966010178432, + "loss": 0.7747, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9051934276749202, + "eval_loss": 0.8114230036735535, + "eval_precision": 0.9057106266216075, + "eval_recall": 0.907150776053215, + "eval_runtime": 7.6766, + "eval_samples_per_second": 470.0, + "eval_steps_per_second": 3.778, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011222877467887107, + "loss": 0.7776, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9118414344309512, + "eval_loss": 0.800033688545227, + "eval_precision": 0.9086836473847006, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.7928, + "eval_samples_per_second": 462.994, + "eval_steps_per_second": 3.721, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011151241149525345, + "loss": 0.771, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011047474309787043, + "loss": 0.7648, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9099530088245646, + "eval_loss": 0.8006107211112976, + "eval_precision": 0.9075033650423671, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.2369, + "eval_samples_per_second": 438.029, + "eval_steps_per_second": 3.521, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00010912181351605538, + "loss": 0.7602, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00010746150305733154, + "loss": 0.7513, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9084232257941697, + "eval_loss": 0.8087606430053711, + "eval_precision": 0.9066563374349289, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.9724, + "eval_samples_per_second": 452.561, + "eval_steps_per_second": 3.638, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001055034824075754, + "loss": 0.7447, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.908247685543376, + "eval_loss": 0.8052621483802795, + "eval_precision": 0.9060397273284067, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.6765, + "eval_samples_per_second": 470.004, + "eval_steps_per_second": 3.778, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010325915630289765, + "loss": 0.7466, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010074159710133152, + "loss": 0.7398, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.9160020914097167, + "eval_loss": 0.7962931394577026, + "eval_precision": 0.9133160706080692, + "eval_recall": 0.9232261640798226, + "eval_runtime": 7.5107, + "eval_samples_per_second": 480.383, + "eval_steps_per_second": 3.861, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 9.796546864124887e-05, + "loss": 0.7322, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.9038516552537947, + "eval_loss": 0.8248952627182007, + "eval_precision": 0.9087700718264768, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.8544, + "eval_samples_per_second": 459.363, + "eval_steps_per_second": 3.692, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 9.494694083e-05, + "loss": 0.7412, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.170359546026635e-05, + "loss": 0.7287, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9110349958727643, + "eval_loss": 0.8089420795440674, + "eval_precision": 0.9116017638012016, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.7437, + "eval_samples_per_second": 465.929, + "eval_steps_per_second": 3.745, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 8.825432380271054e-05, + "loss": 0.723, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 8.461921657140817e-05, + "loss": 0.7215, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9047829729440033, + "eval_loss": 0.8163162469863892, + "eval_precision": 0.9031137890314911, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.2907, + "eval_samples_per_second": 435.185, + "eval_steps_per_second": 3.498, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.081944690297147e-05, + "loss": 0.7175, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.8994529072861367, + "eval_loss": 0.8219234943389893, + "eval_precision": 0.9005721426438295, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.9364, + "eval_samples_per_second": 454.612, + "eval_steps_per_second": 3.654, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 7.687714703096771e-05, + "loss": 0.7189, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.281527937395709e-05, + "loss": 0.7163, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9039949715588392, + "eval_loss": 0.8146753311157227, + "eval_precision": 0.9062112838371414, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.5427, + "eval_samples_per_second": 478.343, + "eval_steps_per_second": 3.845, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 6.865750278801464e-05, + "loss": 0.715, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.442803476276478e-05, + "loss": 0.7182, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.9055299350831074, + "eval_loss": 0.8154621720314026, + "eval_precision": 0.9079904921572199, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.5328, + "eval_samples_per_second": 478.969, + "eval_steps_per_second": 3.85, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.0151510363584565e-05, + "loss": 0.7064, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9169724758288114, + "eval_loss": 0.8043838143348694, + "eval_precision": 0.9164616901241153, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.024, + "eval_samples_per_second": 449.649, + "eval_steps_per_second": 3.614, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.585283874158511e-05, + "loss": 0.7116, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.155705804714616e-05, + "loss": 0.7077, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9161401683691142, + "eval_loss": 0.8005483150482178, + "eval_precision": 0.9142362720862974, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.3733, + "eval_samples_per_second": 489.333, + "eval_steps_per_second": 3.933, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.728918959207841e-05, + "loss": 0.7059, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.307409210986412e-05, + "loss": 0.7044, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9106094208389661, + "eval_loss": 0.8073698878288269, + "eval_precision": 0.9069065100790249, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.1848, + "eval_samples_per_second": 440.817, + "eval_steps_per_second": 3.543, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.8936316962855533e-05, + "loss": 0.703, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8763858093126385, + "eval_f1": 0.8843194015175273, + "eval_loss": 0.8650215864181519, + "eval_precision": 0.898934459587499, + "eval_recall": 0.8763858093126385, + "eval_runtime": 7.5615, + "eval_samples_per_second": 477.153, + "eval_steps_per_second": 3.835, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.489996513979488e-05, + "loss": 0.704, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.098854687659187e-05, + "loss": 0.7051, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9154671361181246, + "eval_loss": 0.8027350306510925, + "eval_precision": 0.9144470011441219, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.7243, + "eval_samples_per_second": 467.095, + "eval_steps_per_second": 3.754, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.722484471801472e-05, + "loss": 0.7006, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9085794720681561, + "eval_loss": 0.8108441233634949, + "eval_precision": 0.9051980987215829, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.5273, + "eval_samples_per_second": 479.322, + "eval_steps_per_second": 3.853, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.3630780817909477e-05, + "loss": 0.7014, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.0227289250873705e-05, + "loss": 0.6996, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9144256749951416, + "eval_loss": 0.8024053573608398, + "eval_precision": 0.9137174062334704, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.6426, + "eval_samples_per_second": 472.09, + "eval_steps_per_second": 3.795, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.703419407912128e-05, + "loss": 0.6978, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.4070093884753273e-05, + "loss": 0.6971, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9115296103681854, + "eval_loss": 0.8094881176948547, + "eval_precision": 0.9097648233992257, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.7715, + "eval_samples_per_second": 464.259, + "eval_steps_per_second": 3.732, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.1352253439990835e-05, + "loss": 0.6992, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9151759984048509, + "eval_loss": 0.8084082007408142, + "eval_precision": 0.9124710137757837, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8386, + "eval_samples_per_second": 460.287, + "eval_steps_per_second": 3.7, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 8.896503146350498e-06, + "loss": 0.6933, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 6.717146828491193e-06, + "loss": 0.6977, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.913883819278621, + "eval_loss": 0.8043181300163269, + "eval_precision": 0.9110648007943729, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.539, + "eval_samples_per_second": 478.579, + "eval_steps_per_second": 3.847, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.826878419798857e-06, + "loss": 0.6968, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.2367080249841275e-06, + "loss": 0.6956, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9141792114977826, + "eval_loss": 0.8076222538948059, + "eval_precision": 0.9118666671185519, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.7678, + "eval_samples_per_second": 464.481, + "eval_steps_per_second": 3.733, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.955897790350779e-06, + "loss": 0.6956, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9166566794592999, + "eval_loss": 0.8003178834915161, + "eval_precision": 0.9136776462494478, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.5019, + "eval_samples_per_second": 480.946, + "eval_steps_per_second": 3.866, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 9.919079552663823e-07, + "loss": 0.6974, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.503533990652545e-07, + "loss": 0.6973, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.9197497326060817, + "eval_loss": 0.7965475916862488, + "eval_precision": 0.9171916852421681, + "eval_recall": 0.9232261640798226, + "eval_runtime": 7.8508, + "eval_samples_per_second": 459.573, + "eval_steps_per_second": 3.694, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4369350581534651, + "learning_rate": 0.0001126983345346803, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-np97sxtx/checkpoint-616/training_args.bin b/run-np97sxtx/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..201fe36cb5b625c2910d40a3ab828d64ea1fd82a --- /dev/null +++ b/run-np97sxtx/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21a0d9227dd0732ca2aeb58ac17918f12ea3681d2f445c2a08447a60ff89872 +size 4792 diff --git a/run-np97sxtx/checkpoint-630/model.safetensors b/run-np97sxtx/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96f321cf2f21cec60774909bbe3502bbec89212e --- /dev/null +++ b/run-np97sxtx/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ef413993418c1005460237808056c1c3c1116718e104d7d8f09f2293d896d5 +size 198025308 diff --git a/run-np97sxtx/checkpoint-630/optimizer.pt b/run-np97sxtx/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf9aeb28c94e4e58aa8d374a29b268fbb7e9d8c0 --- /dev/null +++ b/run-np97sxtx/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf768512d7d502ac14ff53b0929ad04c36eb928ee2d4109d2add0292ca77cb6 +size 395900602 diff --git a/run-np97sxtx/checkpoint-630/rng_state.pth b/run-np97sxtx/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-np97sxtx/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-np97sxtx/checkpoint-630/scheduler.pt b/run-np97sxtx/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..41f22b448a65f99bd7bbee39483f37a28bf34566 --- /dev/null +++ b/run-np97sxtx/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:109ea8c432de61d0a1418b143db2e5946d7a2f697c8165dd91fe7b4455e2adfb +size 1064 diff --git a/run-np97sxtx/checkpoint-630/trainer_state.json b/run-np97sxtx/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b645bb1f379e15bfc75b1386f860d78bfcbcefde --- /dev/null +++ b/run-np97sxtx/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9197497326060817, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-np97sxtx/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.5421877357377304e-05, + "loss": 1.4827, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8281596452328159, + "eval_f1": 0.750566761552725, + "eval_loss": 1.0604153871536255, + "eval_precision": 0.6862681396176564, + "eval_recall": 0.8281596452328159, + "eval_runtime": 8.1608, + "eval_samples_per_second": 442.113, + "eval_steps_per_second": 3.554, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.084375471475461e-05, + "loss": 1.1445, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.626563207213191e-05, + "loss": 0.9462, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.871119733924612, + "eval_f1": 0.837812194732652, + "eval_loss": 0.9149242043495178, + "eval_precision": 0.8627963579904606, + "eval_recall": 0.871119733924612, + "eval_runtime": 7.9189, + "eval_samples_per_second": 455.62, + "eval_steps_per_second": 3.662, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.168750942950922e-05, + "loss": 0.874, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9013916307527915, + "eval_loss": 0.8876226544380188, + "eval_precision": 0.8997638438567944, + "eval_recall": 0.9082594235033259, + "eval_runtime": 7.6742, + "eval_samples_per_second": 470.144, + "eval_steps_per_second": 3.779, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.710938678688652e-05, + "loss": 0.8324, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.253126414426382e-05, + "loss": 0.8033, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9052814539518259, + "eval_loss": 0.805810809135437, + "eval_precision": 0.901509718560917, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.8908, + "eval_samples_per_second": 457.243, + "eval_steps_per_second": 3.675, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010795314150164112, + "loss": 0.7996, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011261966010178432, + "loss": 0.7747, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9051934276749202, + "eval_loss": 0.8114230036735535, + "eval_precision": 0.9057106266216075, + "eval_recall": 0.907150776053215, + "eval_runtime": 7.6766, + "eval_samples_per_second": 470.0, + "eval_steps_per_second": 3.778, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011222877467887107, + "loss": 0.7776, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9118414344309512, + "eval_loss": 0.800033688545227, + "eval_precision": 0.9086836473847006, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.7928, + "eval_samples_per_second": 462.994, + "eval_steps_per_second": 3.721, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011151241149525345, + "loss": 0.771, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011047474309787043, + "loss": 0.7648, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9099530088245646, + "eval_loss": 0.8006107211112976, + "eval_precision": 0.9075033650423671, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.2369, + "eval_samples_per_second": 438.029, + "eval_steps_per_second": 3.521, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00010912181351605538, + "loss": 0.7602, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00010746150305733154, + "loss": 0.7513, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9084232257941697, + "eval_loss": 0.8087606430053711, + "eval_precision": 0.9066563374349289, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.9724, + "eval_samples_per_second": 452.561, + "eval_steps_per_second": 3.638, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001055034824075754, + "loss": 0.7447, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.908247685543376, + "eval_loss": 0.8052621483802795, + "eval_precision": 0.9060397273284067, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.6765, + "eval_samples_per_second": 470.004, + "eval_steps_per_second": 3.778, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00010325915630289765, + "loss": 0.7466, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00010074159710133152, + "loss": 0.7398, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.9160020914097167, + "eval_loss": 0.7962931394577026, + "eval_precision": 0.9133160706080692, + "eval_recall": 0.9232261640798226, + "eval_runtime": 7.5107, + "eval_samples_per_second": 480.383, + "eval_steps_per_second": 3.861, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 9.796546864124887e-05, + "loss": 0.7322, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.9038516552537947, + "eval_loss": 0.8248952627182007, + "eval_precision": 0.9087700718264768, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.8544, + "eval_samples_per_second": 459.363, + "eval_steps_per_second": 3.692, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 9.494694083e-05, + "loss": 0.7412, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.170359546026635e-05, + "loss": 0.7287, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9110349958727643, + "eval_loss": 0.8089420795440674, + "eval_precision": 0.9116017638012016, + "eval_recall": 0.9118625277161863, + "eval_runtime": 7.7437, + "eval_samples_per_second": 465.929, + "eval_steps_per_second": 3.745, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 8.825432380271054e-05, + "loss": 0.723, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 8.461921657140817e-05, + "loss": 0.7215, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9047829729440033, + "eval_loss": 0.8163162469863892, + "eval_precision": 0.9031137890314911, + "eval_recall": 0.9124168514412417, + "eval_runtime": 8.2907, + "eval_samples_per_second": 435.185, + "eval_steps_per_second": 3.498, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.081944690297147e-05, + "loss": 0.7175, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.8994529072861367, + "eval_loss": 0.8219234943389893, + "eval_precision": 0.9005721426438295, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.9364, + "eval_samples_per_second": 454.612, + "eval_steps_per_second": 3.654, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 7.687714703096771e-05, + "loss": 0.7189, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.281527937395709e-05, + "loss": 0.7163, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9039949715588392, + "eval_loss": 0.8146753311157227, + "eval_precision": 0.9062112838371414, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.5427, + "eval_samples_per_second": 478.343, + "eval_steps_per_second": 3.845, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 6.865750278801464e-05, + "loss": 0.715, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.442803476276478e-05, + "loss": 0.7182, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.9055299350831074, + "eval_loss": 0.8154621720314026, + "eval_precision": 0.9079904921572199, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.5328, + "eval_samples_per_second": 478.969, + "eval_steps_per_second": 3.85, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.0151510363584565e-05, + "loss": 0.7064, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9169724758288114, + "eval_loss": 0.8043838143348694, + "eval_precision": 0.9164616901241153, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.024, + "eval_samples_per_second": 449.649, + "eval_steps_per_second": 3.614, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.585283874158511e-05, + "loss": 0.7116, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.155705804714616e-05, + "loss": 0.7077, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9161401683691142, + "eval_loss": 0.8005483150482178, + "eval_precision": 0.9142362720862974, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.3733, + "eval_samples_per_second": 489.333, + "eval_steps_per_second": 3.933, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 4.728918959207841e-05, + "loss": 0.7059, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.307409210986412e-05, + "loss": 0.7044, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9106094208389661, + "eval_loss": 0.8073698878288269, + "eval_precision": 0.9069065100790249, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.1848, + "eval_samples_per_second": 440.817, + "eval_steps_per_second": 3.543, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 3.8936316962855533e-05, + "loss": 0.703, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8763858093126385, + "eval_f1": 0.8843194015175273, + "eval_loss": 0.8650215864181519, + "eval_precision": 0.898934459587499, + "eval_recall": 0.8763858093126385, + "eval_runtime": 7.5615, + "eval_samples_per_second": 477.153, + "eval_steps_per_second": 3.835, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.489996513979488e-05, + "loss": 0.704, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.098854687659187e-05, + "loss": 0.7051, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9154671361181246, + "eval_loss": 0.8027350306510925, + "eval_precision": 0.9144470011441219, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.7243, + "eval_samples_per_second": 467.095, + "eval_steps_per_second": 3.754, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.722484471801472e-05, + "loss": 0.7006, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9085794720681561, + "eval_loss": 0.8108441233634949, + "eval_precision": 0.9051980987215829, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.5273, + "eval_samples_per_second": 479.322, + "eval_steps_per_second": 3.853, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.3630780817909477e-05, + "loss": 0.7014, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.0227289250873705e-05, + "loss": 0.6996, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9144256749951416, + "eval_loss": 0.8024053573608398, + "eval_precision": 0.9137174062334704, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.6426, + "eval_samples_per_second": 472.09, + "eval_steps_per_second": 3.795, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.703419407912128e-05, + "loss": 0.6978, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.4070093884753273e-05, + "loss": 0.6971, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9115296103681854, + "eval_loss": 0.8094881176948547, + "eval_precision": 0.9097648233992257, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.7715, + "eval_samples_per_second": 464.259, + "eval_steps_per_second": 3.732, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.1352253439990835e-05, + "loss": 0.6992, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9151759984048509, + "eval_loss": 0.8084082007408142, + "eval_precision": 0.9124710137757837, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8386, + "eval_samples_per_second": 460.287, + "eval_steps_per_second": 3.7, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 8.896503146350498e-06, + "loss": 0.6933, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 6.717146828491193e-06, + "loss": 0.6977, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.913883819278621, + "eval_loss": 0.8043181300163269, + "eval_precision": 0.9110648007943729, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.539, + "eval_samples_per_second": 478.579, + "eval_steps_per_second": 3.847, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 4.826878419798857e-06, + "loss": 0.6968, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.2367080249841275e-06, + "loss": 0.6956, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9141792114977826, + "eval_loss": 0.8076222538948059, + "eval_precision": 0.9118666671185519, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.7678, + "eval_samples_per_second": 464.481, + "eval_steps_per_second": 3.733, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.955897790350779e-06, + "loss": 0.6956, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9166566794592999, + "eval_loss": 0.8003178834915161, + "eval_precision": 0.9136776462494478, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.5019, + "eval_samples_per_second": 480.946, + "eval_steps_per_second": 3.866, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 9.919079552663823e-07, + "loss": 0.6974, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.503533990652545e-07, + "loss": 0.6973, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.9197497326060817, + "eval_loss": 0.7965475916862488, + "eval_precision": 0.9171916852421681, + "eval_recall": 0.9232261640798226, + "eval_runtime": 7.8508, + "eval_samples_per_second": 459.573, + "eval_steps_per_second": 3.694, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 3.4970936481698864e-08, + "loss": 0.6915, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9137325957604495, + "eval_loss": 0.8043078780174255, + "eval_precision": 0.9123175396382324, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.9023, + "eval_samples_per_second": 456.576, + "eval_steps_per_second": 3.67, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4369350581534651, + "learning_rate": 0.0001126983345346803, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-np97sxtx/checkpoint-630/training_args.bin b/run-np97sxtx/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..201fe36cb5b625c2910d40a3ab828d64ea1fd82a --- /dev/null +++ b/run-np97sxtx/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21a0d9227dd0732ca2aeb58ac17918f12ea3681d2f445c2a08447a60ff89872 +size 4792 diff --git a/run-npxyz5ao/checkpoint-616/model.safetensors b/run-npxyz5ao/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7512d16e1cefc2aa5124cebc0578d4be897a474 --- /dev/null +++ b/run-npxyz5ao/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27b2f63242cfb6ff99acc66064679f72c874e6d0cf23e9e7a1f7d43409ab9b80 +size 198025308 diff --git a/run-npxyz5ao/checkpoint-616/optimizer.pt b/run-npxyz5ao/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b363c5d6eab85f11240560fa03732cf57c37191 --- /dev/null +++ b/run-npxyz5ao/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14eeba3fe6f04dbbe9e393ff1182dcccf23634af7fe2c99d74728a20c643c98e +size 395900602 diff --git a/run-npxyz5ao/checkpoint-616/rng_state.pth b/run-npxyz5ao/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-npxyz5ao/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-npxyz5ao/checkpoint-616/scheduler.pt b/run-npxyz5ao/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..64581046d5ee566690604122a51313abdda87faa --- /dev/null +++ b/run-npxyz5ao/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9660869f65be457dc6ba622476eb504de8301405b85c0446414d326c0196bd51 +size 1064 diff --git a/run-npxyz5ao/checkpoint-616/trainer_state.json b/run-npxyz5ao/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..384574fb2d3bea3b120d70d03a057368bf131011 --- /dev/null +++ b/run-npxyz5ao/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9195419154766948, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-npxyz5ao/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.0392272037948466e-05, + "loss": 1.4152, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.976026177406311, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9651, + "eval_samples_per_second": 452.976, + "eval_steps_per_second": 3.641, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 6.078454407589693e-05, + "loss": 1.0151, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 9.117681611384539e-05, + "loss": 0.8858, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8927383592017738, + "eval_f1": 0.882513620008743, + "eval_loss": 0.99123215675354, + "eval_precision": 0.8871355035720172, + "eval_recall": 0.8927383592017738, + "eval_runtime": 8.1849, + "eval_samples_per_second": 440.812, + "eval_steps_per_second": 3.543, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012156908815179386, + "loss": 0.8223, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9051680129057004, + "eval_loss": 0.8349172472953796, + "eval_precision": 0.9046251483036148, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.3225, + "eval_samples_per_second": 492.73, + "eval_steps_per_second": 3.96, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001519613601897423, + "loss": 0.7972, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00018235363222769078, + "loss": 0.7794, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9049773813322908, + "eval_loss": 0.8061544299125671, + "eval_precision": 0.904070184256196, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.9039, + "eval_samples_per_second": 456.482, + "eval_steps_per_second": 3.669, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00021274590426563923, + "loss": 0.7847, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00022194232695005734, + "loss": 0.7619, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.8920776533181426, + "eval_loss": 0.8706955909729004, + "eval_precision": 0.9025956936682066, + "eval_recall": 0.8891352549889135, + "eval_runtime": 7.751, + "eval_samples_per_second": 465.49, + "eval_steps_per_second": 3.741, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00022117199945791418, + "loss": 0.7665, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.8919486143143143, + "eval_loss": 0.8421220183372498, + "eval_precision": 0.901025700739741, + "eval_recall": 0.8891352549889135, + "eval_runtime": 8.1075, + "eval_samples_per_second": 445.022, + "eval_steps_per_second": 3.577, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00021976024495812477, + "loss": 0.7571, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00021771528639130276, + "loss": 0.7484, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9000609789948604, + "eval_loss": 0.8159117698669434, + "eval_precision": 0.9018812874954257, + "eval_recall": 0.9110310421286031, + "eval_runtime": 7.7094, + "eval_samples_per_second": 468.002, + "eval_steps_per_second": 3.762, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00021504903487432772, + "loss": 0.7432, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00021177702032255542, + "loss": 0.7343, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.8987440002900942, + "eval_loss": 0.826091468334198, + "eval_precision": 0.8989338494847853, + "eval_recall": 0.9024390243902439, + "eval_runtime": 7.6207, + "eval_samples_per_second": 473.446, + "eval_steps_per_second": 3.805, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002079183009938842, + "loss": 0.7309, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8639135254988913, + "eval_f1": 0.8761782285024773, + "eval_loss": 0.8874382376670837, + "eval_precision": 0.9008838716897827, + "eval_recall": 0.8639135254988913, + "eval_runtime": 7.8242, + "eval_samples_per_second": 461.131, + "eval_steps_per_second": 3.706, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0002034953524815485, + "loss": 0.7301, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00019853393680221575, + "loss": 0.7257, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9000168366915469, + "eval_loss": 0.8197638392448425, + "eval_precision": 0.9019579362671689, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.0772, + "eval_samples_per_second": 446.692, + "eval_steps_per_second": 3.59, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00019306295234189897, + "loss": 0.722, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9072308417470947, + "eval_loss": 0.814572811126709, + "eval_precision": 0.9089044244232637, + "eval_recall": 0.9096452328159645, + "eval_runtime": 7.9287, + "eval_samples_per_second": 455.057, + "eval_steps_per_second": 3.658, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00018711426553369377, + "loss": 0.7275, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00018072252524775433, + "loss": 0.715, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.9030904555346652, + "eval_loss": 0.836148202419281, + "eval_precision": 0.9088566068254973, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.9295, + "eval_samples_per_second": 455.007, + "eval_steps_per_second": 3.657, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00017392496097461647, + "loss": 0.7087, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00016676116597737472, + "loss": 0.7081, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.9039112217870526, + "eval_loss": 0.8326026201248169, + "eval_precision": 0.9057445459063664, + "eval_recall": 0.9038248337028825, + "eval_runtime": 7.9304, + "eval_samples_per_second": 454.957, + "eval_steps_per_second": 3.657, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00015927286667577058, + "loss": 0.7095, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9062765979785213, + "eval_loss": 0.8209455013275146, + "eval_precision": 0.9075510698180952, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9414, + "eval_samples_per_second": 454.328, + "eval_steps_per_second": 3.652, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00015150367960544336, + "loss": 0.7079, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00014349885736796413, + "loss": 0.7056, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9064785060562826, + "eval_loss": 0.8175975680351257, + "eval_precision": 0.9031961399955839, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.8248, + "eval_samples_per_second": 461.096, + "eval_steps_per_second": 3.706, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00013530502505139944, + "loss": 0.7066, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.000126969908656656, + "loss": 0.7053, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9048356256160649, + "eval_loss": 0.8117685317993164, + "eval_precision": 0.9032352756612018, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.9414, + "eval_samples_per_second": 454.328, + "eval_steps_per_second": 3.652, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001185420571114202, + "loss": 0.6971, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.907815984453055, + "eval_loss": 0.8236454129219055, + "eval_precision": 0.9133825638649242, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.0519, + "eval_samples_per_second": 448.095, + "eval_steps_per_second": 3.602, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00011007055949085846, + "loss": 0.6999, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00010160475909216052, + "loss": 0.6983, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9153550118403305, + "eval_loss": 0.8057185411453247, + "eval_precision": 0.9186555380846426, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.082, + "eval_samples_per_second": 446.425, + "eval_steps_per_second": 3.588, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 9.319396602833494e-05, + "loss": 0.694, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 8.488717001529008e-05, + "loss": 0.6981, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.9018230070541474, + "eval_loss": 0.8220182657241821, + "eval_precision": 0.8983594825723215, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.3941, + "eval_samples_per_second": 429.825, + "eval_steps_per_second": 3.455, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 7.6732755025108e-05, + "loss": 0.6915, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9076091774518796, + "eval_loss": 0.8206156492233276, + "eval_precision": 0.9070275330506284, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.6094, + "eval_samples_per_second": 474.147, + "eval_steps_per_second": 3.811, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 6.877821746754888e-05, + "loss": 0.6928, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 6.106988954127348e-05, + "loss": 0.6927, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9100213348779886, + "eval_loss": 0.8135704398155212, + "eval_precision": 0.9092391688275947, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.7615, + "eval_samples_per_second": 464.859, + "eval_steps_per_second": 3.736, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 5.365266936615831e-05, + "loss": 0.6907, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9095037169378648, + "eval_loss": 0.8118736743927002, + "eval_precision": 0.9068006386587595, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.0091, + "eval_samples_per_second": 450.488, + "eval_steps_per_second": 3.621, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.656975946858246e-05, + "loss": 0.6912, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.9862415142910424e-05, + "loss": 0.6902, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9166113285548587, + "eval_loss": 0.8000461459159851, + "eval_precision": 0.9139040257316274, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.7835, + "eval_samples_per_second": 463.545, + "eval_steps_per_second": 3.726, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.356970415487133e-05, + "loss": 0.6872, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.7728279186472497e-05, + "loss": 0.6864, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9142230737910019, + "eval_loss": 0.8041835427284241, + "eval_precision": 0.9128699571243266, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9715, + "eval_samples_per_second": 452.613, + "eval_steps_per_second": 3.638, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.2372164347869842e-05, + "loss": 0.6889, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9178721774886978, + "eval_loss": 0.8053695559501648, + "eval_precision": 0.9158802987815214, + "eval_recall": 0.9212860310421286, + "eval_runtime": 7.6417, + "eval_samples_per_second": 472.146, + "eval_steps_per_second": 3.795, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.7532556999683685e-05, + "loss": 0.6844, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.3237646040071051e-05, + "loss": 0.6864, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9162923007357398, + "eval_loss": 0.8009617924690247, + "eval_precision": 0.9142824928975767, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.1496, + "eval_samples_per_second": 442.721, + "eval_steps_per_second": 3.558, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 9.51244771496341e-06, + "loss": 0.6871, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 6.378659907814088e-06, + "loss": 0.6857, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9121210672819667, + "eval_loss": 0.8083218932151794, + "eval_precision": 0.910045125379327, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8962, + "eval_samples_per_second": 456.927, + "eval_steps_per_second": 3.673, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 3.854535757563075e-06, + "loss": 0.6862, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.915996565803201, + "eval_loss": 0.8005344271659851, + "eval_precision": 0.9125197534439784, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.6145, + "eval_samples_per_second": 473.834, + "eval_steps_per_second": 3.809, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.9547773409467633e-06, + "loss": 0.687, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 6.904500384136095e-07, + "loss": 0.6875, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9195419154766948, + "eval_loss": 0.798733651638031, + "eval_precision": 0.9177391189264524, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.941, + "eval_samples_per_second": 454.349, + "eval_steps_per_second": 3.652, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.04924226621259686, + "learning_rate": 0.000222097372585008, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-npxyz5ao/checkpoint-616/training_args.bin b/run-npxyz5ao/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..237da3fd207e5c001a91b9ff244de2d755836581 --- /dev/null +++ b/run-npxyz5ao/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa8f59a3c6f2d85d628b521f1aa2d09eb76c549d28927721b9e5006b6aef6b1 +size 4792 diff --git a/run-npxyz5ao/checkpoint-630/model.safetensors b/run-npxyz5ao/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2585d9bbe7856379c2de6b1b6217a0112ddc351f --- /dev/null +++ b/run-npxyz5ao/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea38c7b4d2cc942f37de87b32dcb3f1b33a1ddeb9e6a12a87de0b198c3e4d3f3 +size 198025308 diff --git a/run-npxyz5ao/checkpoint-630/optimizer.pt b/run-npxyz5ao/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac5fd7f7c247b1ae31e0cf316c10454fac056228 --- /dev/null +++ b/run-npxyz5ao/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bd0eff4523f522126b299d0950ed349be3a476d3eacc433a771be35cbdb5cc4 +size 395900602 diff --git a/run-npxyz5ao/checkpoint-630/rng_state.pth b/run-npxyz5ao/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-npxyz5ao/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-npxyz5ao/checkpoint-630/scheduler.pt b/run-npxyz5ao/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..523e600f512727a43732158c3d18377753fac064 --- /dev/null +++ b/run-npxyz5ao/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1adc4ca51a7d687147c75256f01d94754aabec12773f05478514060b11549462 +size 1064 diff --git a/run-npxyz5ao/checkpoint-630/trainer_state.json b/run-npxyz5ao/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f4aa6c9e98e78b25c701b8f94eea4d736b2ea91c --- /dev/null +++ b/run-npxyz5ao/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9195419154766948, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-npxyz5ao/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.0392272037948466e-05, + "loss": 1.4152, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.976026177406311, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.9651, + "eval_samples_per_second": 452.976, + "eval_steps_per_second": 3.641, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 6.078454407589693e-05, + "loss": 1.0151, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 9.117681611384539e-05, + "loss": 0.8858, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8927383592017738, + "eval_f1": 0.882513620008743, + "eval_loss": 0.99123215675354, + "eval_precision": 0.8871355035720172, + "eval_recall": 0.8927383592017738, + "eval_runtime": 8.1849, + "eval_samples_per_second": 440.812, + "eval_steps_per_second": 3.543, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012156908815179386, + "loss": 0.8223, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9051680129057004, + "eval_loss": 0.8349172472953796, + "eval_precision": 0.9046251483036148, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.3225, + "eval_samples_per_second": 492.73, + "eval_steps_per_second": 3.96, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001519613601897423, + "loss": 0.7972, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00018235363222769078, + "loss": 0.7794, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9049773813322908, + "eval_loss": 0.8061544299125671, + "eval_precision": 0.904070184256196, + "eval_recall": 0.9107538802660754, + "eval_runtime": 7.9039, + "eval_samples_per_second": 456.482, + "eval_steps_per_second": 3.669, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00021274590426563923, + "loss": 0.7847, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00022194232695005734, + "loss": 0.7619, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.8920776533181426, + "eval_loss": 0.8706955909729004, + "eval_precision": 0.9025956936682066, + "eval_recall": 0.8891352549889135, + "eval_runtime": 7.751, + "eval_samples_per_second": 465.49, + "eval_steps_per_second": 3.741, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00022117199945791418, + "loss": 0.7665, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.8919486143143143, + "eval_loss": 0.8421220183372498, + "eval_precision": 0.901025700739741, + "eval_recall": 0.8891352549889135, + "eval_runtime": 8.1075, + "eval_samples_per_second": 445.022, + "eval_steps_per_second": 3.577, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00021976024495812477, + "loss": 0.7571, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00021771528639130276, + "loss": 0.7484, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9000609789948604, + "eval_loss": 0.8159117698669434, + "eval_precision": 0.9018812874954257, + "eval_recall": 0.9110310421286031, + "eval_runtime": 7.7094, + "eval_samples_per_second": 468.002, + "eval_steps_per_second": 3.762, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00021504903487432772, + "loss": 0.7432, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00021177702032255542, + "loss": 0.7343, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.8987440002900942, + "eval_loss": 0.826091468334198, + "eval_precision": 0.8989338494847853, + "eval_recall": 0.9024390243902439, + "eval_runtime": 7.6207, + "eval_samples_per_second": 473.446, + "eval_steps_per_second": 3.805, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002079183009938842, + "loss": 0.7309, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8639135254988913, + "eval_f1": 0.8761782285024773, + "eval_loss": 0.8874382376670837, + "eval_precision": 0.9008838716897827, + "eval_recall": 0.8639135254988913, + "eval_runtime": 7.8242, + "eval_samples_per_second": 461.131, + "eval_steps_per_second": 3.706, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0002034953524815485, + "loss": 0.7301, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00019853393680221575, + "loss": 0.7257, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9000168366915469, + "eval_loss": 0.8197638392448425, + "eval_precision": 0.9019579362671689, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.0772, + "eval_samples_per_second": 446.692, + "eval_steps_per_second": 3.59, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00019306295234189897, + "loss": 0.722, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9072308417470947, + "eval_loss": 0.814572811126709, + "eval_precision": 0.9089044244232637, + "eval_recall": 0.9096452328159645, + "eval_runtime": 7.9287, + "eval_samples_per_second": 455.057, + "eval_steps_per_second": 3.658, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00018711426553369377, + "loss": 0.7275, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00018072252524775433, + "loss": 0.715, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.9030904555346652, + "eval_loss": 0.836148202419281, + "eval_precision": 0.9088566068254973, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.9295, + "eval_samples_per_second": 455.007, + "eval_steps_per_second": 3.657, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00017392496097461647, + "loss": 0.7087, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00016676116597737472, + "loss": 0.7081, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.9039112217870526, + "eval_loss": 0.8326026201248169, + "eval_precision": 0.9057445459063664, + "eval_recall": 0.9038248337028825, + "eval_runtime": 7.9304, + "eval_samples_per_second": 454.957, + "eval_steps_per_second": 3.657, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00015927286667577058, + "loss": 0.7095, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9062765979785213, + "eval_loss": 0.8209455013275146, + "eval_precision": 0.9075510698180952, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9414, + "eval_samples_per_second": 454.328, + "eval_steps_per_second": 3.652, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00015150367960544336, + "loss": 0.7079, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00014349885736796413, + "loss": 0.7056, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9064785060562826, + "eval_loss": 0.8175975680351257, + "eval_precision": 0.9031961399955839, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.8248, + "eval_samples_per_second": 461.096, + "eval_steps_per_second": 3.706, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00013530502505139944, + "loss": 0.7066, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.000126969908656656, + "loss": 0.7053, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9048356256160649, + "eval_loss": 0.8117685317993164, + "eval_precision": 0.9032352756612018, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.9414, + "eval_samples_per_second": 454.328, + "eval_steps_per_second": 3.652, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001185420571114202, + "loss": 0.6971, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.907815984453055, + "eval_loss": 0.8236454129219055, + "eval_precision": 0.9133825638649242, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.0519, + "eval_samples_per_second": 448.095, + "eval_steps_per_second": 3.602, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00011007055949085846, + "loss": 0.6999, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00010160475909216052, + "loss": 0.6983, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9153550118403305, + "eval_loss": 0.8057185411453247, + "eval_precision": 0.9186555380846426, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.082, + "eval_samples_per_second": 446.425, + "eval_steps_per_second": 3.588, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 9.319396602833494e-05, + "loss": 0.694, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 8.488717001529008e-05, + "loss": 0.6981, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.9018230070541474, + "eval_loss": 0.8220182657241821, + "eval_precision": 0.8983594825723215, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.3941, + "eval_samples_per_second": 429.825, + "eval_steps_per_second": 3.455, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 7.6732755025108e-05, + "loss": 0.6915, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9076091774518796, + "eval_loss": 0.8206156492233276, + "eval_precision": 0.9070275330506284, + "eval_recall": 0.9088137472283814, + "eval_runtime": 7.6094, + "eval_samples_per_second": 474.147, + "eval_steps_per_second": 3.811, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 6.877821746754888e-05, + "loss": 0.6928, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 6.106988954127348e-05, + "loss": 0.6927, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9100213348779886, + "eval_loss": 0.8135704398155212, + "eval_precision": 0.9092391688275947, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.7615, + "eval_samples_per_second": 464.859, + "eval_steps_per_second": 3.736, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 5.365266936615831e-05, + "loss": 0.6907, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9095037169378648, + "eval_loss": 0.8118736743927002, + "eval_precision": 0.9068006386587595, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.0091, + "eval_samples_per_second": 450.488, + "eval_steps_per_second": 3.621, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.656975946858246e-05, + "loss": 0.6912, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.9862415142910424e-05, + "loss": 0.6902, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9166113285548587, + "eval_loss": 0.8000461459159851, + "eval_precision": 0.9139040257316274, + "eval_recall": 0.9207317073170732, + "eval_runtime": 7.7835, + "eval_samples_per_second": 463.545, + "eval_steps_per_second": 3.726, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.356970415487133e-05, + "loss": 0.6872, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.7728279186472497e-05, + "loss": 0.6864, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9142230737910019, + "eval_loss": 0.8041835427284241, + "eval_precision": 0.9128699571243266, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9715, + "eval_samples_per_second": 452.613, + "eval_steps_per_second": 3.638, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.2372164347869842e-05, + "loss": 0.6889, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9178721774886978, + "eval_loss": 0.8053695559501648, + "eval_precision": 0.9158802987815214, + "eval_recall": 0.9212860310421286, + "eval_runtime": 7.6417, + "eval_samples_per_second": 472.146, + "eval_steps_per_second": 3.795, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.7532556999683685e-05, + "loss": 0.6844, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.3237646040071051e-05, + "loss": 0.6864, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9162923007357398, + "eval_loss": 0.8009617924690247, + "eval_precision": 0.9142824928975767, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.1496, + "eval_samples_per_second": 442.721, + "eval_steps_per_second": 3.558, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 9.51244771496341e-06, + "loss": 0.6871, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 6.378659907814088e-06, + "loss": 0.6857, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9121210672819667, + "eval_loss": 0.8083218932151794, + "eval_precision": 0.910045125379327, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8962, + "eval_samples_per_second": 456.927, + "eval_steps_per_second": 3.673, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 3.854535757563075e-06, + "loss": 0.6862, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.915996565803201, + "eval_loss": 0.8005344271659851, + "eval_precision": 0.9125197534439784, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.6145, + "eval_samples_per_second": 473.834, + "eval_steps_per_second": 3.809, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.9547773409467633e-06, + "loss": 0.687, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 6.904500384136095e-07, + "loss": 0.6875, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9195419154766948, + "eval_loss": 0.798733651638031, + "eval_precision": 0.9177391189264524, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.941, + "eval_samples_per_second": 454.349, + "eval_steps_per_second": 3.652, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 6.891808243210925e-08, + "loss": 0.6814, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9146319184414617, + "eval_loss": 0.8078266382217407, + "eval_precision": 0.9130340766489339, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.3069, + "eval_samples_per_second": 493.777, + "eval_steps_per_second": 3.969, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.04924226621259686, + "learning_rate": 0.000222097372585008, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-npxyz5ao/checkpoint-630/training_args.bin b/run-npxyz5ao/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..237da3fd207e5c001a91b9ff244de2d755836581 --- /dev/null +++ b/run-npxyz5ao/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa8f59a3c6f2d85d628b521f1aa2d09eb76c549d28927721b9e5006b6aef6b1 +size 4792 diff --git a/run-nrz6k86u/checkpoint-1260/model.safetensors b/run-nrz6k86u/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c0acd87d1cdb4d7392453b4cb29130e6d9d7891 --- /dev/null +++ b/run-nrz6k86u/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116e540295f56eec80d84c3ec75efeb6d2a26d75af9aafbce7e7d3428575c7bc +size 198025308 diff --git a/run-nrz6k86u/checkpoint-1260/optimizer.pt b/run-nrz6k86u/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..693307cadab69764685a51f9bd5b6ebf732c386d --- /dev/null +++ b/run-nrz6k86u/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832547eeefd044f4153931ae51047c4a629c5cf55b69b0f45f8fe2788af5b22d +size 395900602 diff --git a/run-nrz6k86u/checkpoint-1260/rng_state.pth b/run-nrz6k86u/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-nrz6k86u/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-nrz6k86u/checkpoint-1260/scheduler.pt b/run-nrz6k86u/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..85b4c4bcb6151b6cfda038731a71caeedc6d0432 --- /dev/null +++ b/run-nrz6k86u/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5716a3597ceb05686673aeb40125df5f9e91fa18cb280f2d71754fee1f3820d9 +size 1064 diff --git a/run-nrz6k86u/checkpoint-1260/trainer_state.json b/run-nrz6k86u/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c4ce84e903f74c3b6060a7850604bab1b5412be6 --- /dev/null +++ b/run-nrz6k86u/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9229490022172949, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-nrz6k86u/checkpoint-977", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.820608772091276e-06, + "loss": 1.4961, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8253880266075388, + "eval_loss": 1.2066949605941772, + "eval_runtime": 7.0445, + "eval_samples_per_second": 512.17, + "eval_steps_per_second": 8.091, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 9.641217544182553e-06, + "loss": 1.2532, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 1.446182631627383e-05, + "loss": 0.9915, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8514412416851441, + "eval_loss": 0.923211932182312, + "eval_runtime": 6.9494, + "eval_samples_per_second": 519.184, + "eval_steps_per_second": 8.202, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 1.9282435088365106e-05, + "loss": 0.8996, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8527778387069702, + "eval_runtime": 6.78, + "eval_samples_per_second": 532.156, + "eval_steps_per_second": 8.407, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 2.410304386045638e-05, + "loss": 0.8598, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 2.892365263254766e-05, + "loss": 0.8303, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.823358416557312, + "eval_runtime": 6.6289, + "eval_samples_per_second": 544.284, + "eval_steps_per_second": 8.599, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 3.3744261404638936e-05, + "loss": 0.8113, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 3.856487017673021e-05, + "loss": 0.7985, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8065429329872131, + "eval_runtime": 7.1365, + "eval_samples_per_second": 505.568, + "eval_steps_per_second": 7.987, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 4.170902402042605e-05, + "loss": 0.7947, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8107842206954956, + "eval_runtime": 6.6784, + "eval_samples_per_second": 540.25, + "eval_steps_per_second": 8.535, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 4.159920928871538e-05, + "loss": 0.7838, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 4.136028275263404e-05, + "loss": 0.7756, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.799553394317627, + "eval_runtime": 6.6463, + "eval_samples_per_second": 542.855, + "eval_steps_per_second": 8.576, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 4.0993731733179236e-05, + "loss": 0.7808, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 4.0501838015525304e-05, + "loss": 0.7668, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.7965279817581177, + "eval_runtime": 6.9707, + "eval_samples_per_second": 517.593, + "eval_steps_per_second": 8.177, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 3.988766364487937e-05, + "loss": 0.7581, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.7937821745872498, + "eval_runtime": 6.5064, + "eval_samples_per_second": 554.527, + "eval_steps_per_second": 8.761, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 3.915503186520754e-05, + "loss": 0.7662, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 3.83085033194882e-05, + "loss": 0.7551, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.792614221572876, + "eval_runtime": 7.1332, + "eval_samples_per_second": 505.8, + "eval_steps_per_second": 7.991, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 3.7353347659646324e-05, + "loss": 0.7457, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.7945808172225952, + "eval_runtime": 6.8545, + "eval_samples_per_second": 526.369, + "eval_steps_per_second": 8.316, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 3.629551074289723e-05, + "loss": 0.7449, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 3.5141577618703046e-05, + "loss": 0.7429, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8017407059669495, + "eval_runtime": 6.8014, + "eval_samples_per_second": 530.478, + "eval_steps_per_second": 8.381, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 3.3898731536748556e-05, + "loss": 0.7418, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 3.2574709231112414e-05, + "loss": 0.7417, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.7908085584640503, + "eval_runtime": 6.7688, + "eval_samples_per_second": 533.032, + "eval_steps_per_second": 8.421, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 3.117775275899032e-05, + "loss": 0.738, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.7968472838401794, + "eval_runtime": 6.8574, + "eval_samples_per_second": 526.146, + "eval_steps_per_second": 8.312, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 2.9716558193774776e-05, + "loss": 0.7411, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 2.8200221491877757e-05, + "loss": 0.7278, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8187916278839111, + "eval_runtime": 6.949, + "eval_samples_per_second": 519.212, + "eval_steps_per_second": 8.203, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 2.6638181870276022e-05, + "loss": 0.7312, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 2.504016304725465e-05, + "loss": 0.7238, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8031070232391357, + "eval_runtime": 6.5144, + "eval_samples_per_second": 553.848, + "eval_steps_per_second": 8.75, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 2.3416112712126047e-05, + "loss": 0.7278, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8030964732170105, + "eval_runtime": 6.3875, + "eval_samples_per_second": 564.85, + "eval_steps_per_second": 8.924, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 2.177614060072611e-05, + "loss": 0.7245, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 2.0130455562168594e-05, + "loss": 0.7218, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.7923505306243896, + "eval_runtime": 6.8515, + "eval_samples_per_second": 526.602, + "eval_steps_per_second": 8.319, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 1.848930200861797e-05, + "loss": 0.719, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 1.6862896143681782e-05, + "loss": 0.724, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8035361170768738, + "eval_runtime": 6.773, + "eval_samples_per_second": 532.707, + "eval_steps_per_second": 8.416, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 1.5261362366401596e-05, + "loss": 0.7279, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.7960953712463379, + "eval_runtime": 6.9259, + "eval_samples_per_second": 520.944, + "eval_steps_per_second": 8.23, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 1.3694670246728764e-05, + "loss": 0.7193, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 1.2172572464813327e-05, + "loss": 0.7148, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8039548993110657, + "eval_runtime": 6.576, + "eval_samples_per_second": 548.66, + "eval_steps_per_second": 8.668, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 1.070454410043496e-05, + "loss": 0.7173, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8042116761207581, + "eval_runtime": 6.9935, + "eval_samples_per_second": 515.908, + "eval_steps_per_second": 8.15, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 9.29972365050013e-06, + "loss": 0.7177, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 7.96685614177215e-06, + "loss": 0.7154, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.7969016432762146, + "eval_runtime": 6.7111, + "eval_samples_per_second": 537.62, + "eval_steps_per_second": 8.493, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.714238692958506e-06, + "loss": 0.7165, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.549668865031929e-06, + "loss": 0.7159, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.7965477705001831, + "eval_runtime": 6.787, + "eval_samples_per_second": 531.601, + "eval_steps_per_second": 8.398, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.480396121305301e-06, + "loss": 0.7113, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.806559145450592, + "eval_runtime": 6.8173, + "eval_samples_per_second": 529.239, + "eval_steps_per_second": 8.361, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.51307669942212e-06, + "loss": 0.7091, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.6537321761849195e-06, + "loss": 0.7121, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8044928908348083, + "eval_runtime": 6.8311, + "eval_samples_per_second": 528.169, + "eval_steps_per_second": 8.344, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.9077119831562843e-06, + "loss": 0.712, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.2796601063732393e-06, + "loss": 0.7164, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.79651939868927, + "eval_runtime": 6.8218, + "eval_samples_per_second": 528.894, + "eval_steps_per_second": 8.356, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 7.73486177469835e-07, + "loss": 0.7121, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.7924294471740723, + "eval_runtime": 6.7027, + "eval_samples_per_second": 538.288, + "eval_steps_per_second": 8.504, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 3.923411361660687e-07, + "loss": 0.7097, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.3859761562447611e-07, + "loss": 0.7077, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.7947471141815186, + "eval_runtime": 6.5963, + "eval_samples_per_second": 546.97, + "eval_steps_per_second": 8.641, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.383517277580798e-08, + "loss": 0.7107, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.7966174483299255, + "eval_runtime": 6.8526, + "eval_samples_per_second": 526.512, + "eval_steps_per_second": 8.318, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 4.171680668155912e-05, + "metric": "eval/loss", + "warmup_ratio": 0.17805180807901866 + } +} diff --git a/run-nrz6k86u/checkpoint-1260/training_args.bin b/run-nrz6k86u/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..20fe05df888102b38f75bf432c5ac57fdab20c6c --- /dev/null +++ b/run-nrz6k86u/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:904c5e53c2fb6f6f6dca5b85436178983a6f3fbafd95408cb4f011f7d8beed8c +size 4792 diff --git a/run-nrz6k86u/checkpoint-977/model.safetensors b/run-nrz6k86u/checkpoint-977/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2bbf3a5ed17b0fe15d30c18d4c2936e4bc9332a --- /dev/null +++ b/run-nrz6k86u/checkpoint-977/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:792add46dd4a5d60d2ce3409bb33d291d26ed14eb246489465bf8dcc472c6a65 +size 198025308 diff --git a/run-nrz6k86u/checkpoint-977/optimizer.pt b/run-nrz6k86u/checkpoint-977/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..08698025482f57bb8b00cec7e58ca3d420b2912b --- /dev/null +++ b/run-nrz6k86u/checkpoint-977/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2be0f9e69f514d991df446750cb20bdbcf17432e28373743f4b6f70c574b317 +size 395900602 diff --git a/run-nrz6k86u/checkpoint-977/rng_state.pth b/run-nrz6k86u/checkpoint-977/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..64040e071cfd2b36efe8671a5c39c1cd45bfe765 --- /dev/null +++ b/run-nrz6k86u/checkpoint-977/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4edcf0c243ed806f9d2d5b881df10dc45598da934166b759eaaf7ff61a7accc3 +size 14244 diff --git a/run-nrz6k86u/checkpoint-977/scheduler.pt b/run-nrz6k86u/checkpoint-977/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a62dbe9182c3425bc7cff1c9a4b9d95c954e19f --- /dev/null +++ b/run-nrz6k86u/checkpoint-977/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91bcd4d8ea216bcaad54238e0df19ce017c3365eec894a822138457ee674fba9 +size 1064 diff --git a/run-nrz6k86u/checkpoint-977/trainer_state.json b/run-nrz6k86u/checkpoint-977/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bf30f8abd71ed52172ed429e7d8435189c11a625 --- /dev/null +++ b/run-nrz6k86u/checkpoint-977/trainer_state.json @@ -0,0 +1,456 @@ +{ + "best_metric": 0.9229490022172949, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-nrz6k86u/checkpoint-977", + "epoch": 22.988235294117647, + "eval_steps": 500, + "global_step": 977, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.820608772091276e-06, + "loss": 1.4961, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8253880266075388, + "eval_loss": 1.2066949605941772, + "eval_runtime": 7.0445, + "eval_samples_per_second": 512.17, + "eval_steps_per_second": 8.091, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 9.641217544182553e-06, + "loss": 1.2532, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 1.446182631627383e-05, + "loss": 0.9915, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8514412416851441, + "eval_loss": 0.923211932182312, + "eval_runtime": 6.9494, + "eval_samples_per_second": 519.184, + "eval_steps_per_second": 8.202, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 1.9282435088365106e-05, + "loss": 0.8996, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8527778387069702, + "eval_runtime": 6.78, + "eval_samples_per_second": 532.156, + "eval_steps_per_second": 8.407, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 2.410304386045638e-05, + "loss": 0.8598, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 2.892365263254766e-05, + "loss": 0.8303, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.823358416557312, + "eval_runtime": 6.6289, + "eval_samples_per_second": 544.284, + "eval_steps_per_second": 8.599, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 3.3744261404638936e-05, + "loss": 0.8113, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 3.856487017673021e-05, + "loss": 0.7985, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8065429329872131, + "eval_runtime": 7.1365, + "eval_samples_per_second": 505.568, + "eval_steps_per_second": 7.987, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 4.170902402042605e-05, + "loss": 0.7947, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8107842206954956, + "eval_runtime": 6.6784, + "eval_samples_per_second": 540.25, + "eval_steps_per_second": 8.535, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 4.159920928871538e-05, + "loss": 0.7838, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 4.136028275263404e-05, + "loss": 0.7756, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.799553394317627, + "eval_runtime": 6.6463, + "eval_samples_per_second": 542.855, + "eval_steps_per_second": 8.576, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 4.0993731733179236e-05, + "loss": 0.7808, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 4.0501838015525304e-05, + "loss": 0.7668, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.7965279817581177, + "eval_runtime": 6.9707, + "eval_samples_per_second": 517.593, + "eval_steps_per_second": 8.177, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 3.988766364487937e-05, + "loss": 0.7581, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.7937821745872498, + "eval_runtime": 6.5064, + "eval_samples_per_second": 554.527, + "eval_steps_per_second": 8.761, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 3.915503186520754e-05, + "loss": 0.7662, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 3.83085033194882e-05, + "loss": 0.7551, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.792614221572876, + "eval_runtime": 7.1332, + "eval_samples_per_second": 505.8, + "eval_steps_per_second": 7.991, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 3.7353347659646324e-05, + "loss": 0.7457, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.7945808172225952, + "eval_runtime": 6.8545, + "eval_samples_per_second": 526.369, + "eval_steps_per_second": 8.316, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 3.629551074289723e-05, + "loss": 0.7449, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 3.5141577618703046e-05, + "loss": 0.7429, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8017407059669495, + "eval_runtime": 6.8014, + "eval_samples_per_second": 530.478, + "eval_steps_per_second": 8.381, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 3.3898731536748556e-05, + "loss": 0.7418, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 3.2574709231112414e-05, + "loss": 0.7417, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.7908085584640503, + "eval_runtime": 6.7688, + "eval_samples_per_second": 533.032, + "eval_steps_per_second": 8.421, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 3.117775275899032e-05, + "loss": 0.738, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.7968472838401794, + "eval_runtime": 6.8574, + "eval_samples_per_second": 526.146, + "eval_steps_per_second": 8.312, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 2.9716558193774776e-05, + "loss": 0.7411, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 2.8200221491877757e-05, + "loss": 0.7278, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8187916278839111, + "eval_runtime": 6.949, + "eval_samples_per_second": 519.212, + "eval_steps_per_second": 8.203, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 2.6638181870276022e-05, + "loss": 0.7312, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 2.504016304725465e-05, + "loss": 0.7238, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8031070232391357, + "eval_runtime": 6.5144, + "eval_samples_per_second": 553.848, + "eval_steps_per_second": 8.75, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 2.3416112712126047e-05, + "loss": 0.7278, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8030964732170105, + "eval_runtime": 6.3875, + "eval_samples_per_second": 564.85, + "eval_steps_per_second": 8.924, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 2.177614060072611e-05, + "loss": 0.7245, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 2.0130455562168594e-05, + "loss": 0.7218, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.7923505306243896, + "eval_runtime": 6.8515, + "eval_samples_per_second": 526.602, + "eval_steps_per_second": 8.319, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 1.848930200861797e-05, + "loss": 0.719, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 1.6862896143681782e-05, + "loss": 0.724, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8035361170768738, + "eval_runtime": 6.773, + "eval_samples_per_second": 532.707, + "eval_steps_per_second": 8.416, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 1.5261362366401596e-05, + "loss": 0.7279, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.7960953712463379, + "eval_runtime": 6.9259, + "eval_samples_per_second": 520.944, + "eval_steps_per_second": 8.23, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 1.3694670246728764e-05, + "loss": 0.7193, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 1.2172572464813327e-05, + "loss": 0.7148, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8039548993110657, + "eval_runtime": 6.576, + "eval_samples_per_second": 548.66, + "eval_steps_per_second": 8.668, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 1.070454410043496e-05, + "loss": 0.7173, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8042116761207581, + "eval_runtime": 6.9935, + "eval_samples_per_second": 515.908, + "eval_steps_per_second": 8.15, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 9.29972365050013e-06, + "loss": 0.7177, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 7.96685614177215e-06, + "loss": 0.7154, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9229490022172949, + "eval_loss": 0.7969016432762146, + "eval_runtime": 6.7111, + "eval_samples_per_second": 537.62, + "eval_steps_per_second": 8.493, + "step": 977 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 4.171680668155912e-05, + "metric": "eval/loss", + "warmup_ratio": 0.17805180807901866 + } +} diff --git a/run-nrz6k86u/checkpoint-977/training_args.bin b/run-nrz6k86u/checkpoint-977/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..20fe05df888102b38f75bf432c5ac57fdab20c6c --- /dev/null +++ b/run-nrz6k86u/checkpoint-977/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:904c5e53c2fb6f6f6dca5b85436178983a6f3fbafd95408cb4f011f7d8beed8c +size 4792 diff --git a/run-o3o5aal7/checkpoint-1190/model.safetensors b/run-o3o5aal7/checkpoint-1190/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7644471b2d173880c1e8fdda91f72842a975688 --- /dev/null +++ b/run-o3o5aal7/checkpoint-1190/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75cae3398fad21725cb39cedb2e168aee4add06d5817cfc6d8f2176dcde71cc6 +size 198025308 diff --git a/run-o3o5aal7/checkpoint-1190/optimizer.pt b/run-o3o5aal7/checkpoint-1190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea9556e447ef3d3d0fa379b77a97320ba3d8fe94 --- /dev/null +++ b/run-o3o5aal7/checkpoint-1190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d44a2d57663d92c8decb5f9e4b559f4658365847a814bbeaf4ffaff54a2147ca +size 395900602 diff --git a/run-o3o5aal7/checkpoint-1190/rng_state.pth b/run-o3o5aal7/checkpoint-1190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa10329c52a02715f873c9a50812feb7d32c8cd3 --- /dev/null +++ b/run-o3o5aal7/checkpoint-1190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f5febab37757cc5268c77056c937c9c526090d892464a785cf2004d48e5d85 +size 14244 diff --git a/run-o3o5aal7/checkpoint-1190/scheduler.pt b/run-o3o5aal7/checkpoint-1190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..710d3f805a56ee0d6168156f695f7b40325d2335 --- /dev/null +++ b/run-o3o5aal7/checkpoint-1190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef537aaf62f64cd98cfeda6f65e28fe406fc4f836c42ccc83eb90c3dc900a05 +size 1064 diff --git a/run-o3o5aal7/checkpoint-1190/trainer_state.json b/run-o3o5aal7/checkpoint-1190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8ce0f3890d50d2a047ebd04e8749e06f3ae459bf --- /dev/null +++ b/run-o3o5aal7/checkpoint-1190/trainer_state.json @@ -0,0 +1,549 @@ +{ + "best_metric": 0.9207317073170732, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-o3o5aal7/checkpoint-1190", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 1190, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.644669698256437e-05, + "loss": 1.3352, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8553215077605322, + "eval_loss": 0.9271471500396729, + "eval_runtime": 6.9052, + "eval_samples_per_second": 522.504, + "eval_steps_per_second": 8.255, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 5.289339396512874e-05, + "loss": 0.9358, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.934009094769311e-05, + "loss": 0.8458, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8174324035644531, + "eval_runtime": 6.9616, + "eval_samples_per_second": 518.275, + "eval_steps_per_second": 8.188, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010578678793025748, + "loss": 0.8101, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8278202414512634, + "eval_runtime": 6.7137, + "eval_samples_per_second": 537.409, + "eval_steps_per_second": 8.49, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00013223348491282186, + "loss": 0.7965, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015868018189538622, + "loss": 0.7823, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8400340676307678, + "eval_runtime": 6.7977, + "eval_samples_per_second": 530.767, + "eval_steps_per_second": 8.385, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001851268788779506, + "loss": 0.7813, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00021157357586051496, + "loss": 0.7774, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8120608329772949, + "eval_runtime": 7.0946, + "eval_samples_per_second": 508.558, + "eval_steps_per_second": 8.034, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00023802027284307932, + "loss": 0.7747, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.8172792792320251, + "eval_runtime": 6.9766, + "eval_samples_per_second": 517.157, + "eval_steps_per_second": 8.17, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0002644669698256437, + "loss": 0.7672, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00029091366680820805, + "loss": 0.7656, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8440818190574646, + "eval_runtime": 6.8403, + "eval_samples_per_second": 527.463, + "eval_steps_per_second": 8.333, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00031736036379077245, + "loss": 0.7678, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003438070607733368, + "loss": 0.7657, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8794345898004434, + "eval_loss": 0.8826764822006226, + "eval_runtime": 7.0141, + "eval_samples_per_second": 514.394, + "eval_steps_per_second": 8.127, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0003702537577559012, + "loss": 0.7542, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8508869179600886, + "eval_loss": 0.8907228708267212, + "eval_runtime": 6.9002, + "eval_samples_per_second": 522.88, + "eval_steps_per_second": 8.261, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003967004547384655, + "loss": 0.7695, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004231471517210299, + "loss": 0.7687, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8894124168514412, + "eval_loss": 0.848678708076477, + "eval_runtime": 6.5809, + "eval_samples_per_second": 548.251, + "eval_steps_per_second": 8.661, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004221571088986189, + "loss": 0.7665, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8273284435272217, + "eval_runtime": 6.9266, + "eval_samples_per_second": 520.888, + "eval_steps_per_second": 8.229, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0004191962460949851, + "loss": 0.762, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00041429227358495804, + "loss": 0.7542, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8919068736141907, + "eval_loss": 0.8464815616607666, + "eval_runtime": 6.6167, + "eval_samples_per_second": 545.29, + "eval_steps_per_second": 8.615, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00040749108691825466, + "loss": 0.7527, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003988563373898524, + "loss": 0.7505, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8313099145889282, + "eval_runtime": 6.6074, + "eval_samples_per_second": 546.054, + "eval_steps_per_second": 8.627, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003884688363369811, + "loss": 0.7403, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.7621951219512195, + "eval_loss": 1.0304741859436035, + "eval_runtime": 6.834, + "eval_samples_per_second": 527.946, + "eval_steps_per_second": 8.341, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003764257988378315, + "loss": 0.7482, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00036283993389008977, + "loss": 0.7414, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9021618625277162, + "eval_loss": 0.8370293378829956, + "eval_runtime": 6.4695, + "eval_samples_per_second": 557.692, + "eval_steps_per_second": 8.811, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003478383895841832, + "loss": 0.7432, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003315615631432028, + "loss": 0.7361, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8810975609756098, + "eval_loss": 0.8724713325500488, + "eval_runtime": 6.8459, + "eval_samples_per_second": 527.03, + "eval_steps_per_second": 8.326, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00031416178696616387, + "loss": 0.7404, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8985587583148559, + "eval_loss": 0.838969349861145, + "eval_runtime": 6.8331, + "eval_samples_per_second": 528.022, + "eval_steps_per_second": 8.342, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00029580190297172815, + "loss": 0.7262, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00027665373858489476, + "loss": 0.7257, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8204749226570129, + "eval_runtime": 6.6643, + "eval_samples_per_second": 541.393, + "eval_steps_per_second": 8.553, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.000256896498629673, + "loss": 0.7145, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023671508817777368, + "loss": 0.7206, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.827319324016571, + "eval_runtime": 6.8939, + "eval_samples_per_second": 523.362, + "eval_steps_per_second": 8.268, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00021629838204953084, + "loss": 0.7222, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8243474364280701, + "eval_runtime": 6.5552, + "eval_samples_per_second": 550.399, + "eval_steps_per_second": 8.695, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00019583745716253518, + "loss": 0.7099, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00017552380427116743, + "loss": 0.7085, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8336615562438965, + "eval_runtime": 6.4616, + "eval_samples_per_second": 558.372, + "eval_steps_per_second": 8.821, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00015554753583309365, + "loss": 0.7054, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8162431716918945, + "eval_runtime": 6.8355, + "eval_samples_per_second": 527.833, + "eval_steps_per_second": 8.339, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001360956067750362, + "loss": 0.7077, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00011735006480940568, + "loss": 0.7033, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8091212511062622, + "eval_runtime": 6.8648, + "eval_samples_per_second": 525.581, + "eval_steps_per_second": 8.303, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.948634667682333e-05, + "loss": 0.7013, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 8.267163625974527e-05, + "loss": 0.6957, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8194777369499207, + "eval_runtime": 6.7802, + "eval_samples_per_second": 532.141, + "eval_steps_per_second": 8.407, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.706329993335927e-05, + "loss": 0.6988, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8109309673309326, + "eval_runtime": 6.9083, + "eval_samples_per_second": 522.272, + "eval_steps_per_second": 8.251, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 5.2807413797073014e-05, + "loss": 0.6942, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 4.003739657001468e-05, + "loss": 0.6921, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8112218976020813, + "eval_runtime": 6.5687, + "eval_samples_per_second": 549.268, + "eval_steps_per_second": 8.677, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.8872760945074097e-05, + "loss": 0.691, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.941799508737634e-05, + "loss": 0.6949, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.807709813117981, + "eval_runtime": 6.6866, + "eval_samples_per_second": 539.585, + "eval_steps_per_second": 8.524, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.1761584745077091e-05, + "loss": 0.6926, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.8071979284286499, + "eval_runtime": 7.0189, + "eval_samples_per_second": 514.044, + "eval_steps_per_second": 8.121, + "step": 1190 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0004231471517210299, + "metric": "eval/loss", + "warmup_ratio": 0.3300899871048083 + } +} diff --git a/run-o3o5aal7/checkpoint-1190/training_args.bin b/run-o3o5aal7/checkpoint-1190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b53d1f3af458cb1e76fd2d63fd5c619715f62f0f --- /dev/null +++ b/run-o3o5aal7/checkpoint-1190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a103d292dc6fbdd73a5f7b26c94d65013fddf031a73fa2bea26d23b40501a8 +size 4792 diff --git a/run-o3o5aal7/checkpoint-1260/model.safetensors b/run-o3o5aal7/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4c585ed9cd92d18689050f285285823969c9fde --- /dev/null +++ b/run-o3o5aal7/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb57b765d55fcaf1e2b7e9ee38f52c7fe1fd35e4b2d69728b05c0cebadf139f +size 198025308 diff --git a/run-o3o5aal7/checkpoint-1260/optimizer.pt b/run-o3o5aal7/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..73bb591df7157d4c62bab96b94d36cad3765be8a --- /dev/null +++ b/run-o3o5aal7/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89add2917ea1b1ffcc41cbd5a5d5e64795792f04d7f5b4e81346d88a34423fe7 +size 395900602 diff --git a/run-o3o5aal7/checkpoint-1260/rng_state.pth b/run-o3o5aal7/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-o3o5aal7/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-o3o5aal7/checkpoint-1260/scheduler.pt b/run-o3o5aal7/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..87000c828eef706066d7a3b19b633bce7799c1ce --- /dev/null +++ b/run-o3o5aal7/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee30777f1a880c492b7bfd36f599ac7e3a6fdf3f8fcdf14f5173120b69cf397f +size 1064 diff --git a/run-o3o5aal7/checkpoint-1260/trainer_state.json b/run-o3o5aal7/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4c196cfc28211e43e6bafff2ecacefc62fec169e --- /dev/null +++ b/run-o3o5aal7/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9207317073170732, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-o3o5aal7/checkpoint-1190", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.644669698256437e-05, + "loss": 1.3352, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8553215077605322, + "eval_loss": 0.9271471500396729, + "eval_runtime": 6.9052, + "eval_samples_per_second": 522.504, + "eval_steps_per_second": 8.255, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 5.289339396512874e-05, + "loss": 0.9358, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.934009094769311e-05, + "loss": 0.8458, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8174324035644531, + "eval_runtime": 6.9616, + "eval_samples_per_second": 518.275, + "eval_steps_per_second": 8.188, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010578678793025748, + "loss": 0.8101, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8278202414512634, + "eval_runtime": 6.7137, + "eval_samples_per_second": 537.409, + "eval_steps_per_second": 8.49, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00013223348491282186, + "loss": 0.7965, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015868018189538622, + "loss": 0.7823, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8400340676307678, + "eval_runtime": 6.7977, + "eval_samples_per_second": 530.767, + "eval_steps_per_second": 8.385, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001851268788779506, + "loss": 0.7813, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00021157357586051496, + "loss": 0.7774, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8120608329772949, + "eval_runtime": 7.0946, + "eval_samples_per_second": 508.558, + "eval_steps_per_second": 8.034, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00023802027284307932, + "loss": 0.7747, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.8172792792320251, + "eval_runtime": 6.9766, + "eval_samples_per_second": 517.157, + "eval_steps_per_second": 8.17, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0002644669698256437, + "loss": 0.7672, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00029091366680820805, + "loss": 0.7656, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8440818190574646, + "eval_runtime": 6.8403, + "eval_samples_per_second": 527.463, + "eval_steps_per_second": 8.333, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00031736036379077245, + "loss": 0.7678, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003438070607733368, + "loss": 0.7657, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8794345898004434, + "eval_loss": 0.8826764822006226, + "eval_runtime": 7.0141, + "eval_samples_per_second": 514.394, + "eval_steps_per_second": 8.127, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0003702537577559012, + "loss": 0.7542, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8508869179600886, + "eval_loss": 0.8907228708267212, + "eval_runtime": 6.9002, + "eval_samples_per_second": 522.88, + "eval_steps_per_second": 8.261, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003967004547384655, + "loss": 0.7695, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004231471517210299, + "loss": 0.7687, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8894124168514412, + "eval_loss": 0.848678708076477, + "eval_runtime": 6.5809, + "eval_samples_per_second": 548.251, + "eval_steps_per_second": 8.661, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004221571088986189, + "loss": 0.7665, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8273284435272217, + "eval_runtime": 6.9266, + "eval_samples_per_second": 520.888, + "eval_steps_per_second": 8.229, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0004191962460949851, + "loss": 0.762, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00041429227358495804, + "loss": 0.7542, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8919068736141907, + "eval_loss": 0.8464815616607666, + "eval_runtime": 6.6167, + "eval_samples_per_second": 545.29, + "eval_steps_per_second": 8.615, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00040749108691825466, + "loss": 0.7527, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003988563373898524, + "loss": 0.7505, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8313099145889282, + "eval_runtime": 6.6074, + "eval_samples_per_second": 546.054, + "eval_steps_per_second": 8.627, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003884688363369811, + "loss": 0.7403, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.7621951219512195, + "eval_loss": 1.0304741859436035, + "eval_runtime": 6.834, + "eval_samples_per_second": 527.946, + "eval_steps_per_second": 8.341, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003764257988378315, + "loss": 0.7482, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00036283993389008977, + "loss": 0.7414, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9021618625277162, + "eval_loss": 0.8370293378829956, + "eval_runtime": 6.4695, + "eval_samples_per_second": 557.692, + "eval_steps_per_second": 8.811, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003478383895841832, + "loss": 0.7432, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003315615631432028, + "loss": 0.7361, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8810975609756098, + "eval_loss": 0.8724713325500488, + "eval_runtime": 6.8459, + "eval_samples_per_second": 527.03, + "eval_steps_per_second": 8.326, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00031416178696616387, + "loss": 0.7404, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8985587583148559, + "eval_loss": 0.838969349861145, + "eval_runtime": 6.8331, + "eval_samples_per_second": 528.022, + "eval_steps_per_second": 8.342, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00029580190297172815, + "loss": 0.7262, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00027665373858489476, + "loss": 0.7257, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8204749226570129, + "eval_runtime": 6.6643, + "eval_samples_per_second": 541.393, + "eval_steps_per_second": 8.553, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.000256896498629673, + "loss": 0.7145, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023671508817777368, + "loss": 0.7206, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.827319324016571, + "eval_runtime": 6.8939, + "eval_samples_per_second": 523.362, + "eval_steps_per_second": 8.268, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00021629838204953084, + "loss": 0.7222, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8243474364280701, + "eval_runtime": 6.5552, + "eval_samples_per_second": 550.399, + "eval_steps_per_second": 8.695, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00019583745716253518, + "loss": 0.7099, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00017552380427116743, + "loss": 0.7085, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8336615562438965, + "eval_runtime": 6.4616, + "eval_samples_per_second": 558.372, + "eval_steps_per_second": 8.821, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00015554753583309365, + "loss": 0.7054, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8162431716918945, + "eval_runtime": 6.8355, + "eval_samples_per_second": 527.833, + "eval_steps_per_second": 8.339, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0001360956067750362, + "loss": 0.7077, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00011735006480940568, + "loss": 0.7033, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8091212511062622, + "eval_runtime": 6.8648, + "eval_samples_per_second": 525.581, + "eval_steps_per_second": 8.303, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.948634667682333e-05, + "loss": 0.7013, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 8.267163625974527e-05, + "loss": 0.6957, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8194777369499207, + "eval_runtime": 6.7802, + "eval_samples_per_second": 532.141, + "eval_steps_per_second": 8.407, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.706329993335927e-05, + "loss": 0.6988, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8109309673309326, + "eval_runtime": 6.9083, + "eval_samples_per_second": 522.272, + "eval_steps_per_second": 8.251, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 5.2807413797073014e-05, + "loss": 0.6942, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 4.003739657001468e-05, + "loss": 0.6921, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8112218976020813, + "eval_runtime": 6.5687, + "eval_samples_per_second": 549.268, + "eval_steps_per_second": 8.677, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.8872760945074097e-05, + "loss": 0.691, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.941799508737634e-05, + "loss": 0.6949, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.807709813117981, + "eval_runtime": 6.6866, + "eval_samples_per_second": 539.585, + "eval_steps_per_second": 8.524, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.1761584745077091e-05, + "loss": 0.6926, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.8071979284286499, + "eval_runtime": 7.0189, + "eval_samples_per_second": 514.044, + "eval_steps_per_second": 8.121, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 5.975185124399433e-06, + "loss": 0.6863, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 2.1129502792109854e-06, + "loss": 0.6893, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8099397420883179, + "eval_runtime": 6.4163, + "eval_samples_per_second": 562.318, + "eval_steps_per_second": 8.884, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 2.1102629128393648e-07, + "loss": 0.6912, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8072518110275269, + "eval_runtime": 6.8458, + "eval_samples_per_second": 527.037, + "eval_steps_per_second": 8.326, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0004231471517210299, + "metric": "eval/loss", + "warmup_ratio": 0.3300899871048083 + } +} diff --git a/run-o3o5aal7/checkpoint-1260/training_args.bin b/run-o3o5aal7/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b53d1f3af458cb1e76fd2d63fd5c619715f62f0f --- /dev/null +++ b/run-o3o5aal7/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a103d292dc6fbdd73a5f7b26c94d65013fddf031a73fa2bea26d23b40501a8 +size 4792 diff --git a/run-o83l1i0t/checkpoint-1232/model.safetensors b/run-o83l1i0t/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..490fa393edb441535f83d673f5bb6ac39236d5ad --- /dev/null +++ b/run-o83l1i0t/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eda96f8b0acd71db0a34f5f9a77a52b1805231ec346ae864c5c4081e48bbf6e +size 198025308 diff --git a/run-o83l1i0t/checkpoint-1232/optimizer.pt b/run-o83l1i0t/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbb497ba950fad361a8aba3b94a10f6b1a7786c5 --- /dev/null +++ b/run-o83l1i0t/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:732d8280fc82795b1cfc5b36198158786df4552dc899722d2f425363f3bfe224 +size 395900602 diff --git a/run-o83l1i0t/checkpoint-1232/rng_state.pth b/run-o83l1i0t/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-o83l1i0t/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-o83l1i0t/checkpoint-1232/scheduler.pt b/run-o83l1i0t/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ad1ed369033958ea574207697c937157bbe7e01 --- /dev/null +++ b/run-o83l1i0t/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dca4c64dac707c5d1bc3abb8440c9ff915f37aab7c40569962b26a79ef4a18b +size 1064 diff --git a/run-o83l1i0t/checkpoint-1232/trainer_state.json b/run-o83l1i0t/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..57f8dfe33387a21d927100ed415cf760caa99489 --- /dev/null +++ b/run-o83l1i0t/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.917960088691796, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-o83l1i0t/checkpoint-1232", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.6359635957704034e-05, + "loss": 1.2622, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8841463414634146, + "eval_loss": 0.8997307419776917, + "eval_runtime": 6.8432, + "eval_samples_per_second": 527.239, + "eval_steps_per_second": 8.329, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 9.271927191540807e-05, + "loss": 0.8926, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00013907890787311208, + "loss": 0.8184, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8259819149971008, + "eval_runtime": 6.7246, + "eval_samples_per_second": 536.534, + "eval_steps_per_second": 8.476, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018543854383081614, + "loss": 0.8042, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8235899806022644, + "eval_runtime": 6.7893, + "eval_samples_per_second": 531.428, + "eval_steps_per_second": 8.396, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00023179817978852014, + "loss": 0.7955, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00027815781574622416, + "loss": 0.786, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8669623059866962, + "eval_loss": 0.8887752294540405, + "eval_runtime": 6.7087, + "eval_samples_per_second": 537.806, + "eval_steps_per_second": 8.496, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003245174517039282, + "loss": 0.779, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003708770876616323, + "loss": 0.7805, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8318032026290894, + "eval_runtime": 6.9016, + "eval_samples_per_second": 522.775, + "eval_steps_per_second": 8.259, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00041723672361933625, + "loss": 0.782, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8733370288248337, + "eval_loss": 0.8858808875083923, + "eval_runtime": 6.8227, + "eval_samples_per_second": 528.82, + "eval_steps_per_second": 8.354, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0004635963595770403, + "loss": 0.7781, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005099559955347444, + "loss": 0.7806, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8367480039596558, + "eval_runtime": 6.6087, + "eval_samples_per_second": 545.946, + "eval_steps_per_second": 8.625, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005563156314924483, + "loss": 0.7839, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0006026752674501524, + "loss": 0.7914, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8683481152993349, + "eval_loss": 0.8877268433570862, + "eval_runtime": 6.6358, + "eval_samples_per_second": 543.721, + "eval_steps_per_second": 8.59, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0006490349034078564, + "loss": 0.7889, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8572616407982262, + "eval_loss": 0.9073005318641663, + "eval_runtime": 6.7268, + "eval_samples_per_second": 536.363, + "eval_steps_per_second": 8.474, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0006953945393655605, + "loss": 0.8096, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0007255072903400559, + "loss": 0.8075, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8337028824833703, + "eval_loss": 1.0200097560882568, + "eval_runtime": 6.6234, + "eval_samples_per_second": 544.737, + "eval_steps_per_second": 8.606, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0007226961175211446, + "loss": 0.8002, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8719512195121951, + "eval_loss": 0.8917744755744934, + "eval_runtime": 6.9893, + "eval_samples_per_second": 516.215, + "eval_steps_per_second": 8.155, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0007165878658830829, + "loss": 0.7952, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0007072385026047887, + "loss": 0.7972, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8542128603104213, + "eval_loss": 0.918285071849823, + "eval_runtime": 6.8586, + "eval_samples_per_second": 526.058, + "eval_steps_per_second": 8.311, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006947336917221225, + "loss": 0.8019, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0006791880092266241, + "loss": 0.7858, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8805432372505543, + "eval_loss": 0.8827464580535889, + "eval_runtime": 6.8345, + "eval_samples_per_second": 527.912, + "eval_steps_per_second": 8.34, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0006607438932569269, + "loss": 0.7834, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8522727272727273, + "eval_loss": 0.9586824178695679, + "eval_runtime": 6.8639, + "eval_samples_per_second": 525.649, + "eval_steps_per_second": 8.304, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0006395703390017735, + "loss": 0.7993, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.000615861350272655, + "loss": 0.7693, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8849778270509978, + "eval_loss": 0.851362407207489, + "eval_runtime": 6.7212, + "eval_samples_per_second": 536.811, + "eval_steps_per_second": 8.481, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0005898341619336199, + "loss": 0.782, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0005617272494753354, + "loss": 0.7701, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8295415043830872, + "eval_runtime": 6.9137, + "eval_samples_per_second": 521.865, + "eval_steps_per_second": 8.245, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005317981439707824, + "loss": 0.7652, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8346532583236694, + "eval_runtime": 6.551, + "eval_samples_per_second": 550.752, + "eval_steps_per_second": 8.701, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0005003210724331696, + "loss": 0.7582, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00046758444519641055, + "loss": 0.7569, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8287192583084106, + "eval_runtime": 6.8224, + "eval_samples_per_second": 528.843, + "eval_steps_per_second": 8.355, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00043388821334017566, + "loss": 0.7495, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00039954112037224895, + "loss": 0.7453, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8321588635444641, + "eval_runtime": 6.848, + "eval_samples_per_second": 526.872, + "eval_steps_per_second": 8.324, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0003648578733497978, + "loss": 0.7504, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.8359070420265198, + "eval_runtime": 6.99, + "eval_samples_per_second": 516.167, + "eval_steps_per_second": 8.155, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003301562593593052, + "loss": 0.7376, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0002957542337755712, + "loss": 0.7309, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8891352549889135, + "eval_loss": 0.8470795750617981, + "eval_runtime": 6.6562, + "eval_samples_per_second": 542.055, + "eval_steps_per_second": 8.564, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00026196700697876283, + "loss": 0.7205, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8255519866943359, + "eval_runtime": 6.7841, + "eval_samples_per_second": 531.83, + "eval_steps_per_second": 8.402, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002291041562226231, + "loss": 0.7285, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00019746678911649902, + "loss": 0.7198, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9013303769401331, + "eval_loss": 0.8290918469429016, + "eval_runtime": 7.0469, + "eval_samples_per_second": 511.999, + "eval_steps_per_second": 8.089, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00016734478471093468, + "loss": 0.72, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.0001390141374655286, + "loss": 0.7092, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8250666260719299, + "eval_runtime": 7.0871, + "eval_samples_per_second": 509.097, + "eval_steps_per_second": 8.043, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00011273442843508977, + "loss": 0.7045, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8112924098968506, + "eval_runtime": 6.8109, + "eval_samples_per_second": 529.736, + "eval_steps_per_second": 8.369, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 8.874644684448547e-05, + "loss": 0.7068, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.726998384462348e-05, + "loss": 0.7057, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8155537843704224, + "eval_runtime": 6.6072, + "eval_samples_per_second": 546.07, + "eval_steps_per_second": 8.627, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.850181866440002e-05, + "loss": 0.7034, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.261391561059961e-05, + "loss": 0.7026, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8113304376602173, + "eval_runtime": 6.7411, + "eval_samples_per_second": 535.222, + "eval_steps_per_second": 8.456, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.9751848435833424e-05, + "loss": 0.6995, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8166455030441284, + "eval_runtime": 7.0615, + "eval_samples_per_second": 510.941, + "eval_steps_per_second": 8.072, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.0033466511325826e-05, + "loss": 0.6945, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.5478150258136682e-06, + "loss": 0.6968, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8166415691375732, + "eval_runtime": 6.7901, + "eval_samples_per_second": 531.361, + "eval_steps_per_second": 8.395, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0007257066090302131, + "metric": "eval/loss", + "warmup_ratio": 0.3228237965163705 + } +} diff --git a/run-o83l1i0t/checkpoint-1232/training_args.bin b/run-o83l1i0t/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e94d6ec73ade05ec7530ba3c7c91c73d1b80ddb --- /dev/null +++ b/run-o83l1i0t/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e238ce5ece7bcb78fdcdc0c336cf488a00a28693709ae180a3d7bc065295d98 +size 4792 diff --git a/run-o83l1i0t/checkpoint-1260/model.safetensors b/run-o83l1i0t/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08d86bae7f0208c52350c2f66979d018254d7c65 --- /dev/null +++ b/run-o83l1i0t/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e382a24a9c896bdaff44d9600f4336413e5e7e0c7edd6b08c73b3ca007d4af +size 198025308 diff --git a/run-o83l1i0t/checkpoint-1260/optimizer.pt b/run-o83l1i0t/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..77acb93242cea966d57a02a6c1d49b220fe3cac4 --- /dev/null +++ b/run-o83l1i0t/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e94e2f02b683455aafcae6d786e07a6684a6bfe2d31975970a62ff044d79a4 +size 395900602 diff --git a/run-o83l1i0t/checkpoint-1260/rng_state.pth b/run-o83l1i0t/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-o83l1i0t/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-o83l1i0t/checkpoint-1260/scheduler.pt b/run-o83l1i0t/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a45cc33e5c43ccc3a175bf1247b7b94d4ac13f7 --- /dev/null +++ b/run-o83l1i0t/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f948ae48a894e3969cc2f85027a1dc9cff5e0ed95114156abf1fee45a95b1a9 +size 1064 diff --git a/run-o83l1i0t/checkpoint-1260/trainer_state.json b/run-o83l1i0t/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b23b2b74daeb96c33d9bd33980db6a7c761ff8ee --- /dev/null +++ b/run-o83l1i0t/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.917960088691796, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-o83l1i0t/checkpoint-1232", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.6359635957704034e-05, + "loss": 1.2622, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8841463414634146, + "eval_loss": 0.8997307419776917, + "eval_runtime": 6.8432, + "eval_samples_per_second": 527.239, + "eval_steps_per_second": 8.329, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 9.271927191540807e-05, + "loss": 0.8926, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00013907890787311208, + "loss": 0.8184, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8259819149971008, + "eval_runtime": 6.7246, + "eval_samples_per_second": 536.534, + "eval_steps_per_second": 8.476, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018543854383081614, + "loss": 0.8042, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8235899806022644, + "eval_runtime": 6.7893, + "eval_samples_per_second": 531.428, + "eval_steps_per_second": 8.396, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00023179817978852014, + "loss": 0.7955, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00027815781574622416, + "loss": 0.786, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8669623059866962, + "eval_loss": 0.8887752294540405, + "eval_runtime": 6.7087, + "eval_samples_per_second": 537.806, + "eval_steps_per_second": 8.496, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003245174517039282, + "loss": 0.779, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003708770876616323, + "loss": 0.7805, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8318032026290894, + "eval_runtime": 6.9016, + "eval_samples_per_second": 522.775, + "eval_steps_per_second": 8.259, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00041723672361933625, + "loss": 0.782, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8733370288248337, + "eval_loss": 0.8858808875083923, + "eval_runtime": 6.8227, + "eval_samples_per_second": 528.82, + "eval_steps_per_second": 8.354, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0004635963595770403, + "loss": 0.7781, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005099559955347444, + "loss": 0.7806, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8367480039596558, + "eval_runtime": 6.6087, + "eval_samples_per_second": 545.946, + "eval_steps_per_second": 8.625, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005563156314924483, + "loss": 0.7839, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0006026752674501524, + "loss": 0.7914, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8683481152993349, + "eval_loss": 0.8877268433570862, + "eval_runtime": 6.6358, + "eval_samples_per_second": 543.721, + "eval_steps_per_second": 8.59, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0006490349034078564, + "loss": 0.7889, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8572616407982262, + "eval_loss": 0.9073005318641663, + "eval_runtime": 6.7268, + "eval_samples_per_second": 536.363, + "eval_steps_per_second": 8.474, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0006953945393655605, + "loss": 0.8096, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0007255072903400559, + "loss": 0.8075, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8337028824833703, + "eval_loss": 1.0200097560882568, + "eval_runtime": 6.6234, + "eval_samples_per_second": 544.737, + "eval_steps_per_second": 8.606, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0007226961175211446, + "loss": 0.8002, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8719512195121951, + "eval_loss": 0.8917744755744934, + "eval_runtime": 6.9893, + "eval_samples_per_second": 516.215, + "eval_steps_per_second": 8.155, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0007165878658830829, + "loss": 0.7952, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0007072385026047887, + "loss": 0.7972, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8542128603104213, + "eval_loss": 0.918285071849823, + "eval_runtime": 6.8586, + "eval_samples_per_second": 526.058, + "eval_steps_per_second": 8.311, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006947336917221225, + "loss": 0.8019, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0006791880092266241, + "loss": 0.7858, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8805432372505543, + "eval_loss": 0.8827464580535889, + "eval_runtime": 6.8345, + "eval_samples_per_second": 527.912, + "eval_steps_per_second": 8.34, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0006607438932569269, + "loss": 0.7834, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8522727272727273, + "eval_loss": 0.9586824178695679, + "eval_runtime": 6.8639, + "eval_samples_per_second": 525.649, + "eval_steps_per_second": 8.304, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0006395703390017735, + "loss": 0.7993, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.000615861350272655, + "loss": 0.7693, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8849778270509978, + "eval_loss": 0.851362407207489, + "eval_runtime": 6.7212, + "eval_samples_per_second": 536.811, + "eval_steps_per_second": 8.481, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0005898341619336199, + "loss": 0.782, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0005617272494753354, + "loss": 0.7701, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8295415043830872, + "eval_runtime": 6.9137, + "eval_samples_per_second": 521.865, + "eval_steps_per_second": 8.245, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005317981439707824, + "loss": 0.7652, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8346532583236694, + "eval_runtime": 6.551, + "eval_samples_per_second": 550.752, + "eval_steps_per_second": 8.701, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0005003210724331696, + "loss": 0.7582, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00046758444519641055, + "loss": 0.7569, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8287192583084106, + "eval_runtime": 6.8224, + "eval_samples_per_second": 528.843, + "eval_steps_per_second": 8.355, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00043388821334017566, + "loss": 0.7495, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00039954112037224895, + "loss": 0.7453, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8321588635444641, + "eval_runtime": 6.848, + "eval_samples_per_second": 526.872, + "eval_steps_per_second": 8.324, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0003648578733497978, + "loss": 0.7504, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.8359070420265198, + "eval_runtime": 6.99, + "eval_samples_per_second": 516.167, + "eval_steps_per_second": 8.155, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003301562593593052, + "loss": 0.7376, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0002957542337755712, + "loss": 0.7309, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8891352549889135, + "eval_loss": 0.8470795750617981, + "eval_runtime": 6.6562, + "eval_samples_per_second": 542.055, + "eval_steps_per_second": 8.564, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00026196700697876283, + "loss": 0.7205, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8255519866943359, + "eval_runtime": 6.7841, + "eval_samples_per_second": 531.83, + "eval_steps_per_second": 8.402, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002291041562226231, + "loss": 0.7285, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00019746678911649902, + "loss": 0.7198, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9013303769401331, + "eval_loss": 0.8290918469429016, + "eval_runtime": 7.0469, + "eval_samples_per_second": 511.999, + "eval_steps_per_second": 8.089, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00016734478471093468, + "loss": 0.72, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.0001390141374655286, + "loss": 0.7092, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8250666260719299, + "eval_runtime": 7.0871, + "eval_samples_per_second": 509.097, + "eval_steps_per_second": 8.043, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00011273442843508977, + "loss": 0.7045, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8112924098968506, + "eval_runtime": 6.8109, + "eval_samples_per_second": 529.736, + "eval_steps_per_second": 8.369, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 8.874644684448547e-05, + "loss": 0.7068, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.726998384462348e-05, + "loss": 0.7057, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8155537843704224, + "eval_runtime": 6.6072, + "eval_samples_per_second": 546.07, + "eval_steps_per_second": 8.627, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.850181866440002e-05, + "loss": 0.7034, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.261391561059961e-05, + "loss": 0.7026, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8113304376602173, + "eval_runtime": 6.7411, + "eval_samples_per_second": 535.222, + "eval_steps_per_second": 8.456, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.9751848435833424e-05, + "loss": 0.6995, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8166455030441284, + "eval_runtime": 7.0615, + "eval_samples_per_second": 510.941, + "eval_steps_per_second": 8.072, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.0033466511325826e-05, + "loss": 0.6945, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.5478150258136682e-06, + "loss": 0.6968, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8166415691375732, + "eval_runtime": 6.7901, + "eval_samples_per_second": 531.361, + "eval_steps_per_second": 8.395, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 3.5431910429155567e-07, + "loss": 0.7, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8130980134010315, + "eval_runtime": 6.9641, + "eval_samples_per_second": 518.089, + "eval_steps_per_second": 8.185, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0007257066090302131, + "metric": "eval/loss", + "warmup_ratio": 0.3228237965163705 + } +} diff --git a/run-o83l1i0t/checkpoint-1260/training_args.bin b/run-o83l1i0t/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e94d6ec73ade05ec7530ba3c7c91c73d1b80ddb --- /dev/null +++ b/run-o83l1i0t/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e238ce5ece7bcb78fdcdc0c336cf488a00a28693709ae180a3d7bc065295d98 +size 4792 diff --git a/run-okavh3jz/checkpoint-1232/model.safetensors b/run-okavh3jz/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..318c1c47b53ffe0205b2083cca6dcaa2034d995b --- /dev/null +++ b/run-okavh3jz/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6e3e52eb277de6c0ca3286462cf8f02ee11dc6e0ea9a10c2fc051cfaa7107b +size 198025308 diff --git a/run-okavh3jz/checkpoint-1232/optimizer.pt b/run-okavh3jz/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec7312376c4ad2530e0659a78b64589c87b4cab2 --- /dev/null +++ b/run-okavh3jz/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b716461e397fb3ff9624c71bd13c6f151c11a11643091015ece692cbb65c99ca +size 395900602 diff --git a/run-okavh3jz/checkpoint-1232/rng_state.pth b/run-okavh3jz/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-okavh3jz/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-okavh3jz/checkpoint-1232/scheduler.pt b/run-okavh3jz/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f836f2c49fbf14b9d8b277dcc65206a404558604 --- /dev/null +++ b/run-okavh3jz/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:299a3425175cae025e790e224389f8e598e8a9c2ec0c27ddef60d229faa19b7d +size 1064 diff --git a/run-okavh3jz/checkpoint-1232/trainer_state.json b/run-okavh3jz/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..353e5e45e7d3ecbae3b11b276e1945dc02d38162 --- /dev/null +++ b/run-okavh3jz/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9157427937915743, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-okavh3jz/checkpoint-1147", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.221596137049498e-05, + "loss": 1.2106, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.9590930342674255, + "eval_runtime": 6.7292, + "eval_samples_per_second": 536.169, + "eval_steps_per_second": 8.471, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00014443192274098997, + "loss": 0.8603, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021664788411148496, + "loss": 0.8099, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8988359201773836, + "eval_loss": 0.8354387879371643, + "eval_runtime": 6.6977, + "eval_samples_per_second": 538.695, + "eval_steps_per_second": 8.51, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00028886384548197993, + "loss": 0.8001, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8282513618469238, + "eval_runtime": 6.7524, + "eval_samples_per_second": 534.329, + "eval_steps_per_second": 8.441, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00036107980685247496, + "loss": 0.7993, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043329576822296993, + "loss": 0.7937, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8422949002217295, + "eval_loss": 0.9453973770141602, + "eval_runtime": 6.7407, + "eval_samples_per_second": 535.254, + "eval_steps_per_second": 8.456, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005055117295934649, + "loss": 0.7933, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005777276909639599, + "loss": 0.8056, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8470066518847007, + "eval_loss": 0.9151757955551147, + "eval_runtime": 6.814, + "eval_samples_per_second": 529.495, + "eval_steps_per_second": 8.365, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.000649943652334455, + "loss": 0.8039, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.833980044345898, + "eval_loss": 0.9418761730194092, + "eval_runtime": 6.8716, + "eval_samples_per_second": 525.062, + "eval_steps_per_second": 8.295, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007221596137049499, + "loss": 0.8103, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007943755750754449, + "loss": 0.8203, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8702882483370288, + "eval_loss": 0.8939723372459412, + "eval_runtime": 6.7896, + "eval_samples_per_second": 531.402, + "eval_steps_per_second": 8.395, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0008665915364459399, + "loss": 0.8367, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0009388074978164348, + "loss": 0.8195, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7730044345898004, + "eval_loss": 0.9994192719459534, + "eval_runtime": 6.573, + "eval_samples_per_second": 548.911, + "eval_steps_per_second": 8.672, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0009744235606315674, + "loss": 0.8296, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8553215077605322, + "eval_loss": 0.9039067029953003, + "eval_runtime": 6.6516, + "eval_samples_per_second": 542.43, + "eval_steps_per_second": 8.569, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0009704941726925261, + "loss": 0.8455, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0009626671035357318, + "loss": 0.8329, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.871119733924612, + "eval_loss": 0.8739569783210754, + "eval_runtime": 7.0125, + "eval_samples_per_second": 514.508, + "eval_steps_per_second": 8.128, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0009510055107582732, + "loss": 0.8305, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8716740576496674, + "eval_loss": 0.8831256031990051, + "eval_runtime": 6.7312, + "eval_samples_per_second": 536.01, + "eval_steps_per_second": 8.468, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0009356034932065125, + "loss": 0.8264, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00091658533168076, + "loss": 0.8284, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8672394678492239, + "eval_loss": 0.8913092613220215, + "eval_runtime": 6.6484, + "eval_samples_per_second": 542.69, + "eval_steps_per_second": 8.574, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0008941044860979971, + "loss": 0.8308, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0008683423572046702, + "loss": 0.8138, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8944013303769401, + "eval_loss": 0.8459126353263855, + "eval_runtime": 6.4619, + "eval_samples_per_second": 558.352, + "eval_steps_per_second": 8.821, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0008395068228314462, + "loss": 0.8022, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.86529933481153, + "eval_loss": 0.8961465954780579, + "eval_runtime": 6.5724, + "eval_samples_per_second": 548.96, + "eval_steps_per_second": 8.673, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0008078305605010734, + "loss": 0.8179, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0007735691699244347, + "loss": 0.7968, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8896895787139689, + "eval_loss": 0.8442733287811279, + "eval_runtime": 6.9736, + "eval_samples_per_second": 517.383, + "eval_steps_per_second": 8.174, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0007369991105346071, + "loss": 0.8057, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0006984154707012196, + "loss": 0.7924, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8899667405764967, + "eval_loss": 0.8458725810050964, + "eval_runtime": 6.8559, + "eval_samples_per_second": 526.262, + "eval_steps_per_second": 8.314, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.000658129586625601, + "loss": 0.7917, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8871951219512195, + "eval_loss": 0.8488112688064575, + "eval_runtime": 6.3552, + "eval_samples_per_second": 567.725, + "eval_steps_per_second": 8.969, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0006164665301301466, + "loss": 0.7844, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0005737624856132453, + "loss": 0.7785, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.8913525498891353, + "eval_loss": 0.8352165818214417, + "eval_runtime": 6.7584, + "eval_samples_per_second": 533.855, + "eval_steps_per_second": 8.434, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0005303620373354455, + "loss": 0.7694, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0004866153889260869, + "loss": 0.7671, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8364294171333313, + "eval_runtime": 6.5455, + "eval_samples_per_second": 551.214, + "eval_steps_per_second": 8.708, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0004428755375465426, + "loss": 0.771, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8185279369354248, + "eval_runtime": 6.9966, + "eval_samples_per_second": 515.678, + "eval_steps_per_second": 8.147, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003994954255121051, + "loss": 0.7581, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00035682509235642965, + "loss": 0.7469, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8977272727272727, + "eval_loss": 0.8398142457008362, + "eval_runtime": 6.9454, + "eval_samples_per_second": 519.477, + "eval_steps_per_second": 8.207, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00031520885031888777, + "loss": 0.741, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8257396221160889, + "eval_runtime": 6.9152, + "eval_samples_per_second": 521.752, + "eval_steps_per_second": 8.243, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002749825060461692, + "loss": 0.7449, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00023647065092657495, + "loss": 0.7372, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8215458989143372, + "eval_runtime": 6.8721, + "eval_samples_per_second": 525.021, + "eval_steps_per_second": 8.294, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00019998404192162598, + "loss": 0.7339, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00016581709402938987, + "loss": 0.7261, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.826031506061554, + "eval_runtime": 6.9981, + "eval_samples_per_second": 515.569, + "eval_steps_per_second": 8.145, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00013424550461314512, + "loss": 0.7228, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8198442459106445, + "eval_runtime": 6.4862, + "eval_samples_per_second": 556.258, + "eval_steps_per_second": 8.788, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 0.00010552402876497783, + "loss": 0.7193, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.988442365517435e-05, + "loss": 0.716, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8266257643699646, + "eval_runtime": 6.8604, + "eval_samples_per_second": 525.917, + "eval_steps_per_second": 8.309, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.753357845471266e-05, + "loss": 0.7155, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.865184492074318e-05, + "loss": 0.7156, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8136420249938965, + "eval_runtime": 6.8726, + "eval_samples_per_second": 524.985, + "eval_steps_per_second": 8.294, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.3391582115768684e-05, + "loss": 0.7108, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8153643608093262, + "eval_runtime": 6.9535, + "eval_samples_per_second": 518.874, + "eval_steps_per_second": 8.197, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.1875927003370222e-05, + "loss": 0.7053, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.197800840691497e-06, + "loss": 0.7062, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8101699948310852, + "eval_runtime": 6.7699, + "eval_samples_per_second": 532.944, + "eval_steps_per_second": 8.42, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0009749154785016824, + "metric": "eval/loss", + "warmup_ratio": 0.27797593233292184 + } +} diff --git a/run-okavh3jz/checkpoint-1232/training_args.bin b/run-okavh3jz/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e22fedbbbeb4777884165cdf3b99f17e79597ed7 --- /dev/null +++ b/run-okavh3jz/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0277a74dd105dabd34cd2be17e07aaac42350665cf6523ed2ac5b3d51d304238 +size 4792 diff --git a/run-okavh3jz/checkpoint-1260/model.safetensors b/run-okavh3jz/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f54cc3712af8b1b6687312ce49ee46a68c68da99 --- /dev/null +++ b/run-okavh3jz/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f622bb331f25d414718ed8155d0ba653294993f0e8374eaac0198288323491 +size 198025308 diff --git a/run-okavh3jz/checkpoint-1260/optimizer.pt b/run-okavh3jz/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..be83fba44f23197966661883492edc7dc1292553 --- /dev/null +++ b/run-okavh3jz/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562ca54b1a5f0973760248fa444729dfb6821ce794ee79b99816f185a4b3680a +size 395900602 diff --git a/run-okavh3jz/checkpoint-1260/rng_state.pth b/run-okavh3jz/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-okavh3jz/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-okavh3jz/checkpoint-1260/scheduler.pt b/run-okavh3jz/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2167e0b6baf10e2499963593d801575493bfc735 --- /dev/null +++ b/run-okavh3jz/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7371c48e38b100afae276fc15a78a3b41f94546bbb1a54253508e88e7ea773 +size 1064 diff --git a/run-okavh3jz/checkpoint-1260/trainer_state.json b/run-okavh3jz/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7000993923e6fb75f7957064fe1cdbcb0892c791 --- /dev/null +++ b/run-okavh3jz/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9185144124168514, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-okavh3jz/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.221596137049498e-05, + "loss": 1.2106, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.9590930342674255, + "eval_runtime": 6.7292, + "eval_samples_per_second": 536.169, + "eval_steps_per_second": 8.471, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00014443192274098997, + "loss": 0.8603, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021664788411148496, + "loss": 0.8099, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8988359201773836, + "eval_loss": 0.8354387879371643, + "eval_runtime": 6.6977, + "eval_samples_per_second": 538.695, + "eval_steps_per_second": 8.51, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00028886384548197993, + "loss": 0.8001, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8282513618469238, + "eval_runtime": 6.7524, + "eval_samples_per_second": 534.329, + "eval_steps_per_second": 8.441, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00036107980685247496, + "loss": 0.7993, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043329576822296993, + "loss": 0.7937, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8422949002217295, + "eval_loss": 0.9453973770141602, + "eval_runtime": 6.7407, + "eval_samples_per_second": 535.254, + "eval_steps_per_second": 8.456, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005055117295934649, + "loss": 0.7933, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005777276909639599, + "loss": 0.8056, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8470066518847007, + "eval_loss": 0.9151757955551147, + "eval_runtime": 6.814, + "eval_samples_per_second": 529.495, + "eval_steps_per_second": 8.365, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.000649943652334455, + "loss": 0.8039, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.833980044345898, + "eval_loss": 0.9418761730194092, + "eval_runtime": 6.8716, + "eval_samples_per_second": 525.062, + "eval_steps_per_second": 8.295, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007221596137049499, + "loss": 0.8103, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007943755750754449, + "loss": 0.8203, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8702882483370288, + "eval_loss": 0.8939723372459412, + "eval_runtime": 6.7896, + "eval_samples_per_second": 531.402, + "eval_steps_per_second": 8.395, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0008665915364459399, + "loss": 0.8367, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0009388074978164348, + "loss": 0.8195, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7730044345898004, + "eval_loss": 0.9994192719459534, + "eval_runtime": 6.573, + "eval_samples_per_second": 548.911, + "eval_steps_per_second": 8.672, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0009744235606315674, + "loss": 0.8296, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8553215077605322, + "eval_loss": 0.9039067029953003, + "eval_runtime": 6.6516, + "eval_samples_per_second": 542.43, + "eval_steps_per_second": 8.569, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0009704941726925261, + "loss": 0.8455, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0009626671035357318, + "loss": 0.8329, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.871119733924612, + "eval_loss": 0.8739569783210754, + "eval_runtime": 7.0125, + "eval_samples_per_second": 514.508, + "eval_steps_per_second": 8.128, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0009510055107582732, + "loss": 0.8305, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8716740576496674, + "eval_loss": 0.8831256031990051, + "eval_runtime": 6.7312, + "eval_samples_per_second": 536.01, + "eval_steps_per_second": 8.468, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0009356034932065125, + "loss": 0.8264, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00091658533168076, + "loss": 0.8284, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8672394678492239, + "eval_loss": 0.8913092613220215, + "eval_runtime": 6.6484, + "eval_samples_per_second": 542.69, + "eval_steps_per_second": 8.574, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0008941044860979971, + "loss": 0.8308, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0008683423572046702, + "loss": 0.8138, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8944013303769401, + "eval_loss": 0.8459126353263855, + "eval_runtime": 6.4619, + "eval_samples_per_second": 558.352, + "eval_steps_per_second": 8.821, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0008395068228314462, + "loss": 0.8022, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.86529933481153, + "eval_loss": 0.8961465954780579, + "eval_runtime": 6.5724, + "eval_samples_per_second": 548.96, + "eval_steps_per_second": 8.673, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0008078305605010734, + "loss": 0.8179, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0007735691699244347, + "loss": 0.7968, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8896895787139689, + "eval_loss": 0.8442733287811279, + "eval_runtime": 6.9736, + "eval_samples_per_second": 517.383, + "eval_steps_per_second": 8.174, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0007369991105346071, + "loss": 0.8057, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0006984154707012196, + "loss": 0.7924, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8899667405764967, + "eval_loss": 0.8458725810050964, + "eval_runtime": 6.8559, + "eval_samples_per_second": 526.262, + "eval_steps_per_second": 8.314, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.000658129586625601, + "loss": 0.7917, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8871951219512195, + "eval_loss": 0.8488112688064575, + "eval_runtime": 6.3552, + "eval_samples_per_second": 567.725, + "eval_steps_per_second": 8.969, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0006164665301301466, + "loss": 0.7844, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0005737624856132453, + "loss": 0.7785, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.8913525498891353, + "eval_loss": 0.8352165818214417, + "eval_runtime": 6.7584, + "eval_samples_per_second": 533.855, + "eval_steps_per_second": 8.434, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0005303620373354455, + "loss": 0.7694, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0004866153889260869, + "loss": 0.7671, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8982815964523282, + "eval_loss": 0.8364294171333313, + "eval_runtime": 6.5455, + "eval_samples_per_second": 551.214, + "eval_steps_per_second": 8.708, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0004428755375465426, + "loss": 0.771, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8185279369354248, + "eval_runtime": 6.9966, + "eval_samples_per_second": 515.678, + "eval_steps_per_second": 8.147, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003994954255121051, + "loss": 0.7581, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00035682509235642965, + "loss": 0.7469, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8977272727272727, + "eval_loss": 0.8398142457008362, + "eval_runtime": 6.9454, + "eval_samples_per_second": 519.477, + "eval_steps_per_second": 8.207, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00031520885031888777, + "loss": 0.741, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8257396221160889, + "eval_runtime": 6.9152, + "eval_samples_per_second": 521.752, + "eval_steps_per_second": 8.243, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.0002749825060461692, + "loss": 0.7449, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00023647065092657495, + "loss": 0.7372, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8215458989143372, + "eval_runtime": 6.8721, + "eval_samples_per_second": 525.021, + "eval_steps_per_second": 8.294, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00019998404192162598, + "loss": 0.7339, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00016581709402938987, + "loss": 0.7261, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.826031506061554, + "eval_runtime": 6.9981, + "eval_samples_per_second": 515.569, + "eval_steps_per_second": 8.145, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00013424550461314512, + "loss": 0.7228, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8198442459106445, + "eval_runtime": 6.4862, + "eval_samples_per_second": 556.258, + "eval_steps_per_second": 8.788, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 0.00010552402876497783, + "loss": 0.7193, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.988442365517435e-05, + "loss": 0.716, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8266257643699646, + "eval_runtime": 6.8604, + "eval_samples_per_second": 525.917, + "eval_steps_per_second": 8.309, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.753357845471266e-05, + "loss": 0.7155, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.865184492074318e-05, + "loss": 0.7156, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8136420249938965, + "eval_runtime": 6.8726, + "eval_samples_per_second": 524.985, + "eval_steps_per_second": 8.294, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.3391582115768684e-05, + "loss": 0.7108, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8153643608093262, + "eval_runtime": 6.9535, + "eval_samples_per_second": 518.874, + "eval_steps_per_second": 8.197, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.1875927003370222e-05, + "loss": 0.7053, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 4.197800840691497e-06, + "loss": 0.7062, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8101699948310852, + "eval_runtime": 6.7699, + "eval_samples_per_second": 532.944, + "eval_steps_per_second": 8.42, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 4.1915938522819337e-07, + "loss": 0.7105, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8053973317146301, + "eval_runtime": 6.8045, + "eval_samples_per_second": 530.239, + "eval_steps_per_second": 8.377, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0009749154785016824, + "metric": "eval/loss", + "warmup_ratio": 0.27797593233292184 + } +} diff --git a/run-okavh3jz/checkpoint-1260/training_args.bin b/run-okavh3jz/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e22fedbbbeb4777884165cdf3b99f17e79597ed7 --- /dev/null +++ b/run-okavh3jz/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0277a74dd105dabd34cd2be17e07aaac42350665cf6523ed2ac5b3d51d304238 +size 4792 diff --git a/run-op4sn8c0/checkpoint-616/model.safetensors b/run-op4sn8c0/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1806cbfdd98f3b82ff9a19be8652b413585c501a --- /dev/null +++ b/run-op4sn8c0/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e27f233fd2473a4e70014c25ca23d0561034577ac875a0ea8d01a591f39e23f2 +size 198025308 diff --git a/run-op4sn8c0/checkpoint-616/optimizer.pt b/run-op4sn8c0/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f5955353ec65ba6acf5d70b9bd1baeea27ace71 --- /dev/null +++ b/run-op4sn8c0/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28b0b2eb48492ee8dcde96d44955e2a16d850322ba08a973112afdfadccc2e43 +size 395900602 diff --git a/run-op4sn8c0/checkpoint-616/rng_state.pth b/run-op4sn8c0/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-op4sn8c0/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-op4sn8c0/checkpoint-616/scheduler.pt b/run-op4sn8c0/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c42da689253c03c7490a904a2ebb9b573e69de9 --- /dev/null +++ b/run-op4sn8c0/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea30b9c962c64e7b224854ae2595f1d9ad66438643b742e78067fba5813c2162 +size 1064 diff --git a/run-op4sn8c0/checkpoint-616/trainer_state.json b/run-op4sn8c0/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1af221eb85f3e56f605b78b639a80d5d3f33fa55 --- /dev/null +++ b/run-op4sn8c0/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9180425092382838, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-op4sn8c0/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.869295841849924e-06, + "loss": 1.5191, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.7965631929046563, + "eval_f1": 0.7423295486214312, + "eval_loss": 1.3226805925369263, + "eval_precision": 0.7100810036231323, + "eval_recall": 0.7965631929046563, + "eval_runtime": 8.1801, + "eval_samples_per_second": 441.068, + "eval_steps_per_second": 3.545, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.1738591683699848e-05, + "loss": 1.3637, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.760788752554977e-05, + "loss": 1.0878, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 0.9711477160453796, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 7.767, + "eval_samples_per_second": 464.53, + "eval_steps_per_second": 3.734, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 2.3477183367399697e-05, + "loss": 0.9581, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8761086474501109, + "eval_f1": 0.8416331363677444, + "eval_loss": 0.893968939781189, + "eval_precision": 0.871742014030393, + "eval_recall": 0.8761086474501109, + "eval_runtime": 8.2891, + "eval_samples_per_second": 435.269, + "eval_steps_per_second": 3.499, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 2.934647920924962e-05, + "loss": 0.908, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 3.521577505109954e-05, + "loss": 0.8629, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.8983914067309089, + "eval_loss": 0.8388552665710449, + "eval_precision": 0.9014570162875383, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.122, + "eval_samples_per_second": 444.223, + "eval_steps_per_second": 3.571, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 4.108507089294947e-05, + "loss": 0.8389, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 4.2861065966768006e-05, + "loss": 0.8042, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8970144868267157, + "eval_loss": 0.824699342250824, + "eval_precision": 0.895190691400972, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.8702, + "eval_samples_per_second": 458.436, + "eval_steps_per_second": 3.685, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 4.2712301835516054e-05, + "loss": 0.8014, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9096323387640634, + "eval_loss": 0.8049204349517822, + "eval_precision": 0.9056333151145155, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.4869, + "eval_samples_per_second": 425.128, + "eval_steps_per_second": 3.417, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 4.243966658123231e-05, + "loss": 0.7936, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 4.20447482020464e-05, + "loss": 0.7889, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9029229651525811, + "eval_loss": 0.8012121319770813, + "eval_precision": 0.8989517183681658, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.9055, + "eval_samples_per_second": 456.39, + "eval_steps_per_second": 3.668, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 4.152984694943955e-05, + "loss": 0.7864, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 4.0897961930142194e-05, + "loss": 0.7743, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9076201485012886, + "eval_loss": 0.8059703707695007, + "eval_precision": 0.9050381098794386, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1162, + "eval_samples_per_second": 444.542, + "eval_steps_per_second": 3.573, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 4.015277363746183e-05, + "loss": 0.7676, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9111280690696913, + "eval_loss": 0.7946896553039551, + "eval_precision": 0.9075294756850688, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9912, + "eval_samples_per_second": 451.498, + "eval_steps_per_second": 3.629, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 3.929862251378952e-05, + "loss": 0.7732, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 3.8340483669150435e-05, + "loss": 0.7657, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9147615582135056, + "eval_loss": 0.7952580451965332, + "eval_precision": 0.9105684094066431, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.5261, + "eval_samples_per_second": 479.396, + "eval_steps_per_second": 3.853, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 3.7283937903053435e-05, + "loss": 0.7548, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9101119078837723, + "eval_loss": 0.8107021450996399, + "eval_precision": 0.9105297353348769, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.0679, + "eval_samples_per_second": 447.205, + "eval_steps_per_second": 3.594, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 3.6135139198426445e-05, + "loss": 0.7685, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 3.4900778876973444e-05, + "loss": 0.7567, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9155084171814515, + "eval_loss": 0.7982032299041748, + "eval_precision": 0.9121002322468377, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.022, + "eval_samples_per_second": 449.764, + "eval_steps_per_second": 3.615, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 3.358804662473447e-05, + "loss": 0.747, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 3.220458861486025e-05, + "loss": 0.7489, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9093897695689978, + "eval_loss": 0.8012551069259644, + "eval_precision": 0.9057952147383399, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.5759, + "eval_samples_per_second": 476.25, + "eval_steps_per_second": 3.828, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 3.075846297152056e-05, + "loss": 0.7441, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9079876086634898, + "eval_loss": 0.8018399477005005, + "eval_precision": 0.9056989419542447, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.7396, + "eval_samples_per_second": 466.174, + "eval_steps_per_second": 3.747, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 2.9258092834352503e-05, + "loss": 0.7456, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 2.7712217296830298e-05, + "loss": 0.7449, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9105274895251003, + "eval_loss": 0.799005925655365, + "eval_precision": 0.9087264727667718, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.0262, + "eval_samples_per_second": 449.529, + "eval_steps_per_second": 3.613, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 2.6129840504322652e-05, + "loss": 0.7419, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 2.452017920832219e-05, + "loss": 0.7428, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9156388076692723, + "eval_loss": 0.7932678461074829, + "eval_precision": 0.9117208185809739, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.7606, + "eval_samples_per_second": 464.912, + "eval_steps_per_second": 3.737, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 2.2892609082323803e-05, + "loss": 0.7346, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9148467091269405, + "eval_loss": 0.791762113571167, + "eval_precision": 0.9112329235845791, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8002, + "eval_samples_per_second": 462.553, + "eval_steps_per_second": 3.718, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 2.125661011204212e-05, + "loss": 0.7397, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 1.9621711378049235e-05, + "loss": 0.7332, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.913536046637672, + "eval_loss": 0.7962279319763184, + "eval_precision": 0.9098826498528608, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.4014, + "eval_samples_per_second": 429.45, + "eval_steps_per_second": 3.452, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 1.799743555245341e-05, + "loss": 0.7315, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 1.639324343290463e-05, + "loss": 0.7317, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9130865673809098, + "eval_loss": 0.7980937957763672, + "eval_precision": 0.9103573391213672, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.8733, + "eval_samples_per_second": 458.259, + "eval_steps_per_second": 3.683, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.4818478836995692e-05, + "loss": 0.7292, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9053662965572726, + "eval_loss": 0.8129709362983704, + "eval_precision": 0.9048729835624054, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.6819, + "eval_samples_per_second": 469.678, + "eval_steps_per_second": 3.775, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.328231417802825e-05, + "loss": 0.7288, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 1.1793697039144618e-05, + "loss": 0.7306, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9153538349348589, + "eval_loss": 0.7941568493843079, + "eval_precision": 0.9124658554941831, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.7668, + "eval_samples_per_second": 464.54, + "eval_steps_per_second": 3.734, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.0361298057010887e-05, + "loss": 0.7292, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9089477943039547, + "eval_loss": 0.8033773899078369, + "eval_precision": 0.9049041711343716, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8149, + "eval_samples_per_second": 461.685, + "eval_steps_per_second": 3.711, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 8.993460418609512e-06, + "loss": 0.7263, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 7.69815126530345e-06, + "loss": 0.7265, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9140241970612899, + "eval_loss": 0.7956911325454712, + "eval_precision": 0.9166004325066892, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.6354, + "eval_samples_per_second": 472.537, + "eval_steps_per_second": 3.798, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 6.482915287225047e-06, + "loss": 0.7249, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 5.354830778284994e-06, + "loss": 0.7257, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9143189759969924, + "eval_loss": 0.7966518402099609, + "eval_precision": 0.9107598347578122, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.8168, + "eval_samples_per_second": 461.57, + "eval_steps_per_second": 3.71, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 4.320468407764331e-06, + "loss": 0.7254, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9156704977780687, + "eval_loss": 0.7977079153060913, + "eval_precision": 0.9118565336589846, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.5991, + "eval_samples_per_second": 474.795, + "eval_steps_per_second": 3.816, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 3.385852948629583e-06, + "loss": 0.7171, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.5564281854893117e-06, + "loss": 0.7253, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9157644225620017, + "eval_loss": 0.796122670173645, + "eval_precision": 0.9123144115378639, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.158, + "eval_samples_per_second": 442.267, + "eval_steps_per_second": 3.555, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.8370252065899274e-06, + "loss": 0.7248, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.2318342645380982e-06, + "loss": 0.7212, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9103659156520163, + "eval_loss": 0.8033692240715027, + "eval_precision": 0.9073890190460301, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9562, + "eval_samples_per_second": 453.482, + "eval_steps_per_second": 3.645, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 7.443803696505056e-07, + "loss": 0.7225, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9148793552579334, + "eval_loss": 0.7972266674041748, + "eval_precision": 0.9113045934322207, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.2067, + "eval_samples_per_second": 439.641, + "eval_steps_per_second": 3.534, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 3.7750275809046586e-07, + "loss": 0.7247, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.333383543818683e-07, + "loss": 0.7246, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.9180425092382838, + "eval_loss": 0.7936407923698425, + "eval_precision": 0.9207745632797418, + "eval_recall": 0.9221175166297118, + "eval_runtime": 8.0167, + "eval_samples_per_second": 450.058, + "eval_steps_per_second": 3.617, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4065110008787439, + "learning_rate": 4.2891008075057136e-05, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-op4sn8c0/checkpoint-616/training_args.bin b/run-op4sn8c0/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6941bb267c4da1efcf1919fa30f871ec8a9a910c --- /dev/null +++ b/run-op4sn8c0/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fef9ea221e288b643ea0f4cdeb76a50b810fbc6fe4ac72f9c967505a486fcaf +size 4792 diff --git a/run-op4sn8c0/checkpoint-630/model.safetensors b/run-op4sn8c0/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..076daf7466dc828e0c074cc22108f77ffc265636 --- /dev/null +++ b/run-op4sn8c0/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ca8c6844b5e5d4dc77613c2aa2146b6510d2e63b033d10134d1c6ec935fd7f6 +size 198025308 diff --git a/run-op4sn8c0/checkpoint-630/optimizer.pt b/run-op4sn8c0/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..55fd3c1e77510c0f92b696182f0cb8adf3509f13 --- /dev/null +++ b/run-op4sn8c0/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980a10ca54c2a01da4fcc550dd1f2ba7d2b58b14435781c5e1c498d0cf293a4d +size 395900602 diff --git a/run-op4sn8c0/checkpoint-630/rng_state.pth b/run-op4sn8c0/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-op4sn8c0/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-op4sn8c0/checkpoint-630/scheduler.pt b/run-op4sn8c0/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..86971862c5cc4448c3ca0c6a3ad736b47aa5df72 --- /dev/null +++ b/run-op4sn8c0/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76a5a0c4d9615c488f9be65e18d2868c9290cbd25f925ef497f35c5d347a0c6e +size 1064 diff --git a/run-op4sn8c0/checkpoint-630/trainer_state.json b/run-op4sn8c0/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6a260c9dfab6482151f930ac5b57320d5ee634c2 --- /dev/null +++ b/run-op4sn8c0/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9180425092382838, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-op4sn8c0/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.869295841849924e-06, + "loss": 1.5191, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.7965631929046563, + "eval_f1": 0.7423295486214312, + "eval_loss": 1.3226805925369263, + "eval_precision": 0.7100810036231323, + "eval_recall": 0.7965631929046563, + "eval_runtime": 8.1801, + "eval_samples_per_second": 441.068, + "eval_steps_per_second": 3.545, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.1738591683699848e-05, + "loss": 1.3637, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.760788752554977e-05, + "loss": 1.0878, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 0.9711477160453796, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 7.767, + "eval_samples_per_second": 464.53, + "eval_steps_per_second": 3.734, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 2.3477183367399697e-05, + "loss": 0.9581, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8761086474501109, + "eval_f1": 0.8416331363677444, + "eval_loss": 0.893968939781189, + "eval_precision": 0.871742014030393, + "eval_recall": 0.8761086474501109, + "eval_runtime": 8.2891, + "eval_samples_per_second": 435.269, + "eval_steps_per_second": 3.499, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 2.934647920924962e-05, + "loss": 0.908, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 3.521577505109954e-05, + "loss": 0.8629, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.8983914067309089, + "eval_loss": 0.8388552665710449, + "eval_precision": 0.9014570162875383, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.122, + "eval_samples_per_second": 444.223, + "eval_steps_per_second": 3.571, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 4.108507089294947e-05, + "loss": 0.8389, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 4.2861065966768006e-05, + "loss": 0.8042, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8970144868267157, + "eval_loss": 0.824699342250824, + "eval_precision": 0.895190691400972, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.8702, + "eval_samples_per_second": 458.436, + "eval_steps_per_second": 3.685, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 4.2712301835516054e-05, + "loss": 0.8014, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9096323387640634, + "eval_loss": 0.8049204349517822, + "eval_precision": 0.9056333151145155, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.4869, + "eval_samples_per_second": 425.128, + "eval_steps_per_second": 3.417, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 4.243966658123231e-05, + "loss": 0.7936, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 4.20447482020464e-05, + "loss": 0.7889, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9029229651525811, + "eval_loss": 0.8012121319770813, + "eval_precision": 0.8989517183681658, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.9055, + "eval_samples_per_second": 456.39, + "eval_steps_per_second": 3.668, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 4.152984694943955e-05, + "loss": 0.7864, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 4.0897961930142194e-05, + "loss": 0.7743, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9076201485012886, + "eval_loss": 0.8059703707695007, + "eval_precision": 0.9050381098794386, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1162, + "eval_samples_per_second": 444.542, + "eval_steps_per_second": 3.573, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 4.015277363746183e-05, + "loss": 0.7676, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9111280690696913, + "eval_loss": 0.7946896553039551, + "eval_precision": 0.9075294756850688, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9912, + "eval_samples_per_second": 451.498, + "eval_steps_per_second": 3.629, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 3.929862251378952e-05, + "loss": 0.7732, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 3.8340483669150435e-05, + "loss": 0.7657, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9147615582135056, + "eval_loss": 0.7952580451965332, + "eval_precision": 0.9105684094066431, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.5261, + "eval_samples_per_second": 479.396, + "eval_steps_per_second": 3.853, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 3.7283937903053435e-05, + "loss": 0.7548, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9101119078837723, + "eval_loss": 0.8107021450996399, + "eval_precision": 0.9105297353348769, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.0679, + "eval_samples_per_second": 447.205, + "eval_steps_per_second": 3.594, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 3.6135139198426445e-05, + "loss": 0.7685, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 3.4900778876973444e-05, + "loss": 0.7567, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9155084171814515, + "eval_loss": 0.7982032299041748, + "eval_precision": 0.9121002322468377, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.022, + "eval_samples_per_second": 449.764, + "eval_steps_per_second": 3.615, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 3.358804662473447e-05, + "loss": 0.747, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 3.220458861486025e-05, + "loss": 0.7489, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9093897695689978, + "eval_loss": 0.8012551069259644, + "eval_precision": 0.9057952147383399, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.5759, + "eval_samples_per_second": 476.25, + "eval_steps_per_second": 3.828, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 3.075846297152056e-05, + "loss": 0.7441, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9079876086634898, + "eval_loss": 0.8018399477005005, + "eval_precision": 0.9056989419542447, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.7396, + "eval_samples_per_second": 466.174, + "eval_steps_per_second": 3.747, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 2.9258092834352503e-05, + "loss": 0.7456, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 2.7712217296830298e-05, + "loss": 0.7449, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9105274895251003, + "eval_loss": 0.799005925655365, + "eval_precision": 0.9087264727667718, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.0262, + "eval_samples_per_second": 449.529, + "eval_steps_per_second": 3.613, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 2.6129840504322652e-05, + "loss": 0.7419, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 2.452017920832219e-05, + "loss": 0.7428, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9156388076692723, + "eval_loss": 0.7932678461074829, + "eval_precision": 0.9117208185809739, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.7606, + "eval_samples_per_second": 464.912, + "eval_steps_per_second": 3.737, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 2.2892609082323803e-05, + "loss": 0.7346, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9148467091269405, + "eval_loss": 0.791762113571167, + "eval_precision": 0.9112329235845791, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.8002, + "eval_samples_per_second": 462.553, + "eval_steps_per_second": 3.718, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 2.125661011204212e-05, + "loss": 0.7397, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 1.9621711378049235e-05, + "loss": 0.7332, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.913536046637672, + "eval_loss": 0.7962279319763184, + "eval_precision": 0.9098826498528608, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.4014, + "eval_samples_per_second": 429.45, + "eval_steps_per_second": 3.452, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 1.799743555245341e-05, + "loss": 0.7315, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 1.639324343290463e-05, + "loss": 0.7317, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9130865673809098, + "eval_loss": 0.7980937957763672, + "eval_precision": 0.9103573391213672, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.8733, + "eval_samples_per_second": 458.259, + "eval_steps_per_second": 3.683, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.4818478836995692e-05, + "loss": 0.7292, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9053662965572726, + "eval_loss": 0.8129709362983704, + "eval_precision": 0.9048729835624054, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.6819, + "eval_samples_per_second": 469.678, + "eval_steps_per_second": 3.775, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.328231417802825e-05, + "loss": 0.7288, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 1.1793697039144618e-05, + "loss": 0.7306, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9153538349348589, + "eval_loss": 0.7941568493843079, + "eval_precision": 0.9124658554941831, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.7668, + "eval_samples_per_second": 464.54, + "eval_steps_per_second": 3.734, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.0361298057010887e-05, + "loss": 0.7292, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9089477943039547, + "eval_loss": 0.8033773899078369, + "eval_precision": 0.9049041711343716, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8149, + "eval_samples_per_second": 461.685, + "eval_steps_per_second": 3.711, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 8.993460418609512e-06, + "loss": 0.7263, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 7.69815126530345e-06, + "loss": 0.7265, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9140241970612899, + "eval_loss": 0.7956911325454712, + "eval_precision": 0.9166004325066892, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.6354, + "eval_samples_per_second": 472.537, + "eval_steps_per_second": 3.798, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 6.482915287225047e-06, + "loss": 0.7249, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 5.354830778284994e-06, + "loss": 0.7257, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9143189759969924, + "eval_loss": 0.7966518402099609, + "eval_precision": 0.9107598347578122, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.8168, + "eval_samples_per_second": 461.57, + "eval_steps_per_second": 3.71, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 4.320468407764331e-06, + "loss": 0.7254, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9156704977780687, + "eval_loss": 0.7977079153060913, + "eval_precision": 0.9118565336589846, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.5991, + "eval_samples_per_second": 474.795, + "eval_steps_per_second": 3.816, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 3.385852948629583e-06, + "loss": 0.7171, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.5564281854893117e-06, + "loss": 0.7253, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9157644225620017, + "eval_loss": 0.796122670173645, + "eval_precision": 0.9123144115378639, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.158, + "eval_samples_per_second": 442.267, + "eval_steps_per_second": 3.555, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.8370252065899274e-06, + "loss": 0.7248, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.2318342645380982e-06, + "loss": 0.7212, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9103659156520163, + "eval_loss": 0.8033692240715027, + "eval_precision": 0.9073890190460301, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.9562, + "eval_samples_per_second": 453.482, + "eval_steps_per_second": 3.645, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 7.443803696505056e-07, + "loss": 0.7225, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9148793552579334, + "eval_loss": 0.7972266674041748, + "eval_precision": 0.9113045934322207, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.2067, + "eval_samples_per_second": 439.641, + "eval_steps_per_second": 3.534, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 3.7750275809046586e-07, + "loss": 0.7247, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.333383543818683e-07, + "loss": 0.7246, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9221175166297118, + "eval_f1": 0.9180425092382838, + "eval_loss": 0.7936407923698425, + "eval_precision": 0.9207745632797418, + "eval_recall": 0.9221175166297118, + "eval_runtime": 8.0167, + "eval_samples_per_second": 450.058, + "eval_steps_per_second": 3.617, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.330932462508827e-08, + "loss": 0.7216, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9109471589423197, + "eval_loss": 0.7991477251052856, + "eval_precision": 0.9136805964449902, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.5552, + "eval_samples_per_second": 477.554, + "eval_steps_per_second": 3.838, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4065110008787439, + "learning_rate": 4.2891008075057136e-05, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-op4sn8c0/checkpoint-630/training_args.bin b/run-op4sn8c0/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6941bb267c4da1efcf1919fa30f871ec8a9a910c --- /dev/null +++ b/run-op4sn8c0/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fef9ea221e288b643ea0f4cdeb76a50b810fbc6fe4ac72f9c967505a486fcaf +size 4792 diff --git a/run-p5tu6jza/checkpoint-616/model.safetensors b/run-p5tu6jza/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96b67d548ce0254dfaad149d6496b15ada04f97b --- /dev/null +++ b/run-p5tu6jza/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4d23d85735e9edbda1b4d201e0188433192c2b562e87f4787cb9156dd8ad75 +size 198025308 diff --git a/run-p5tu6jza/checkpoint-616/optimizer.pt b/run-p5tu6jza/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..714427a9bd51d570495e92e5748c52a8f3f964bf --- /dev/null +++ b/run-p5tu6jza/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6200bebf950a09d6972ecc3df0b1b78039366b35f98bedf45de87f620a2d2bda +size 395900602 diff --git a/run-p5tu6jza/checkpoint-616/rng_state.pth b/run-p5tu6jza/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-p5tu6jza/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-p5tu6jza/checkpoint-616/scheduler.pt b/run-p5tu6jza/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..31836e5f0935598b09e9a6adac812af23fd0f7d4 --- /dev/null +++ b/run-p5tu6jza/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb7bd4486f04996247fc134dea0511d3d627ad4a0e6a108a6b4a260f4a104c4 +size 1064 diff --git a/run-p5tu6jza/checkpoint-616/trainer_state.json b/run-p5tu6jza/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5f7eba0c2e20f076778536addd4676540888e38b --- /dev/null +++ b/run-p5tu6jza/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9143650745105159, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-p5tu6jza/checkpoint-573", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.633205284590377e-05, + "loss": 1.3188, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1513012647628784, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.8688, + "eval_samples_per_second": 458.523, + "eval_steps_per_second": 3.685, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00013266410569180755, + "loss": 0.9628, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00019899615853771128, + "loss": 0.8381, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8841463414634146, + "eval_f1": 0.8854543675055913, + "eval_loss": 0.8711126446723938, + "eval_precision": 0.8950059921865504, + "eval_recall": 0.8841463414634146, + "eval_runtime": 7.9882, + "eval_samples_per_second": 451.665, + "eval_steps_per_second": 3.63, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002653282113836151, + "loss": 0.7974, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9033291552565315, + "eval_loss": 0.8077923655509949, + "eval_precision": 0.9041433894860506, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.0908, + "eval_samples_per_second": 445.936, + "eval_steps_per_second": 3.584, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003316602642295188, + "loss": 0.7923, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00039799231707542257, + "loss": 0.7769, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.88332540628377, + "eval_loss": 0.825872004032135, + "eval_precision": 0.888254096302808, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.1835, + "eval_samples_per_second": 440.885, + "eval_steps_per_second": 3.544, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004643243699213263, + "loss": 0.7855, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004843958405482807, + "loss": 0.7703, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8852549889135255, + "eval_f1": 0.8614659670499215, + "eval_loss": 0.8643367886543274, + "eval_precision": 0.8726602035589426, + "eval_recall": 0.8852549889135255, + "eval_runtime": 7.7778, + "eval_samples_per_second": 463.887, + "eval_steps_per_second": 3.729, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004827145775004345, + "loss": 0.7767, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8944013303769401, + "eval_f1": 0.8834439300574406, + "eval_loss": 0.8428008556365967, + "eval_precision": 0.8849320439038928, + "eval_recall": 0.8944013303769401, + "eval_runtime": 8.1179, + "eval_samples_per_second": 444.449, + "eval_steps_per_second": 3.572, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.000479633380610365, + "loss": 0.7733, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00047517019669463965, + "loss": 0.7619, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8996674057649667, + "eval_f1": 0.8927958323955955, + "eval_loss": 0.8312403559684753, + "eval_precision": 0.8906566133102257, + "eval_recall": 0.8996674057649667, + "eval_runtime": 7.7014, + "eval_samples_per_second": 468.486, + "eval_steps_per_second": 3.766, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00046935102212606407, + "loss": 0.7554, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00046220975141455806, + "loss": 0.7507, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8725055432372506, + "eval_f1": 0.8454792916401634, + "eval_loss": 0.8923548460006714, + "eval_precision": 0.8612537336253757, + "eval_recall": 0.8725055432372506, + "eval_runtime": 8.0681, + "eval_samples_per_second": 447.195, + "eval_steps_per_second": 3.594, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00045378797978434435, + "loss": 0.7448, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8048780487804879, + "eval_f1": 0.8249545211644205, + "eval_loss": 0.983521044254303, + "eval_precision": 0.8790619429557502, + "eval_recall": 0.8048780487804879, + "eval_runtime": 7.6324, + "eval_samples_per_second": 472.723, + "eval_steps_per_second": 3.8, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0004441347608973642, + "loss": 0.7511, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004333063211340891, + "loss": 0.7459, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8827605321507761, + "eval_f1": 0.8566394005949085, + "eval_loss": 0.8948411345481873, + "eval_precision": 0.8774264627923165, + "eval_recall": 0.8827605321507761, + "eval_runtime": 8.1961, + "eval_samples_per_second": 440.209, + "eval_steps_per_second": 3.538, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004213657320959374, + "loss": 0.7383, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8827605321507761, + "eval_f1": 0.8827117209026983, + "eval_loss": 0.8575074672698975, + "eval_precision": 0.8942569435685355, + "eval_recall": 0.8827605321507761, + "eval_runtime": 7.8909, + "eval_samples_per_second": 457.238, + "eval_steps_per_second": 3.675, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00040838254323684504, + "loss": 0.7473, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00039443237676377454, + "loss": 0.7292, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8941241685144125, + "eval_f1": 0.8794521418624451, + "eval_loss": 0.8463320136070251, + "eval_precision": 0.8804807767639773, + "eval_recall": 0.8941241685144125, + "eval_runtime": 8.1611, + "eval_samples_per_second": 442.1, + "eval_steps_per_second": 3.553, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003795964871657146, + "loss": 0.7221, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00036396128793674844, + "loss": 0.7229, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8911383656156205, + "eval_loss": 0.8335779905319214, + "eval_precision": 0.8945757811265731, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.942, + "eval_samples_per_second": 454.291, + "eval_steps_per_second": 3.651, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003476178482498523, + "loss": 0.7222, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8957871396895787, + "eval_f1": 0.8929986476842874, + "eval_loss": 0.8421359062194824, + "eval_precision": 0.8953021009564096, + "eval_recall": 0.8957871396895787, + "eval_runtime": 7.9365, + "eval_samples_per_second": 454.607, + "eval_steps_per_second": 3.654, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003306613625131103, + "loss": 0.7241, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003131905958979803, + "loss": 0.7201, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.8957191056937194, + "eval_loss": 0.832567572593689, + "eval_precision": 0.8912777315741526, + "eval_recall": 0.9024390243902439, + "eval_runtime": 7.8352, + "eval_samples_per_second": 460.488, + "eval_steps_per_second": 3.701, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002953073090692036, + "loss": 0.71, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0002771156654670887, + "loss": 0.7168, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.88470066518847, + "eval_f1": 0.8881462000157886, + "eval_loss": 0.8582449555397034, + "eval_precision": 0.8971648552876966, + "eval_recall": 0.88470066518847, + "eval_runtime": 8.0681, + "eval_samples_per_second": 447.193, + "eval_steps_per_second": 3.594, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0002587216245945278, + "loss": 0.7118, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9058404364624941, + "eval_loss": 0.8176126480102539, + "eval_precision": 0.906813179284527, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.9958, + "eval_samples_per_second": 451.237, + "eval_steps_per_second": 3.627, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002402323248426235, + "loss": 0.7059, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00022175545944973227, + "loss": 0.7002, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.908626478246406, + "eval_loss": 0.815597653388977, + "eval_precision": 0.9056271259755384, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.5288, + "eval_samples_per_second": 479.225, + "eval_steps_per_second": 3.852, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002033986492287318, + "loss": 0.6983, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00018526881571613905, + "loss": 0.7036, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9037582999898407, + "eval_loss": 0.8201684355735779, + "eval_precision": 0.9015947243817638, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.7273, + "eval_samples_per_second": 466.915, + "eval_steps_per_second": 3.753, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00016747155839425112, + "loss": 0.6951, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8952328159645233, + "eval_f1": 0.8940505868264133, + "eval_loss": 0.8415734171867371, + "eval_precision": 0.8935207842848221, + "eval_recall": 0.8952328159645233, + "eval_runtime": 7.5407, + "eval_samples_per_second": 478.47, + "eval_steps_per_second": 3.846, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00015011053961375606, + "loss": 0.6988, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00013328688079940933, + "loss": 0.695, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9091880600421776, + "eval_loss": 0.8160383701324463, + "eval_precision": 0.9076620612546528, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.8573, + "eval_samples_per_second": 459.192, + "eval_steps_per_second": 3.691, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001170985734556503, + "loss": 0.6935, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9104122390843779, + "eval_loss": 0.8160849809646606, + "eval_precision": 0.9089225975530801, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.2326, + "eval_samples_per_second": 438.259, + "eval_steps_per_second": 3.523, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00010163990840283216, + "loss": 0.6933, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 8.700092656854799e-05, + "loss": 0.6921, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9107477400836806, + "eval_loss": 0.8119679093360901, + "eval_precision": 0.9096693795294111, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8562, + "eval_samples_per_second": 459.256, + "eval_steps_per_second": 3.691, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 7.326689453298898e-05, + "loss": 0.6897, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.051780788308651e-05, + "loss": 0.6881, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9109843298213895, + "eval_loss": 0.8124634623527527, + "eval_precision": 0.909076880697265, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8742, + "eval_samples_per_second": 458.203, + "eval_steps_per_second": 3.683, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 4.882792526821298e-05, + "loss": 0.6883, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9098877591082808, + "eval_loss": 0.8138753175735474, + "eval_precision": 0.9076469413117539, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.8958, + "eval_samples_per_second": 456.952, + "eval_steps_per_second": 3.673, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 3.826533587139282e-05, + "loss": 0.6861, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.8891562815342377e-05, + "loss": 0.6867, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9143099300203761, + "eval_loss": 0.8139218091964722, + "eval_precision": 0.912031031283455, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.9482, + "eval_samples_per_second": 453.939, + "eval_steps_per_second": 3.649, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.0761204813348393e-05, + "loss": 0.6864, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.392161815223514e-05, + "loss": 0.6847, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9143650745105159, + "eval_loss": 0.8107795715332031, + "eval_precision": 0.9123235932761957, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0251, + "eval_samples_per_second": 449.587, + "eval_steps_per_second": 3.614, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 8.412640859750557e-06, + "loss": 0.6867, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9090266682470386, + "eval_loss": 0.8108495473861694, + "eval_precision": 0.9060040264473824, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.2713, + "eval_samples_per_second": 436.206, + "eval_steps_per_second": 3.506, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 4.26636066299203e-06, + "loss": 0.6885, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.5069280894275435e-06, + "loss": 0.6857, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9127408065192087, + "eval_loss": 0.8093183636665344, + "eval_precision": 0.9106024198845617, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.0208, + "eval_samples_per_second": 449.832, + "eval_steps_per_second": 3.616, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.12100926759305336, + "learning_rate": 0.0004847342323354506, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-p5tu6jza/checkpoint-616/training_args.bin b/run-p5tu6jza/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ab6e77c80a4405d07ee5e8bd7cc53aa1620b2dda --- /dev/null +++ b/run-p5tu6jza/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c83177c875345e50518c62f620b5395f703cb0d338e9b51f1ecab84bc690768 +size 4792 diff --git a/run-p5tu6jza/checkpoint-630/model.safetensors b/run-p5tu6jza/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28087339fa69c109b9f39d0784395fb0632138a6 --- /dev/null +++ b/run-p5tu6jza/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678504259a15478030f8749743467ad12450e1280e824fa199e0e6d48d62b4a6 +size 198025308 diff --git a/run-p5tu6jza/checkpoint-630/optimizer.pt b/run-p5tu6jza/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d0540e29fd5befa86ded1f95383b80ca00ba70e --- /dev/null +++ b/run-p5tu6jza/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee12db7668839060067f39b5bc716506086bc9fb007be7ced1cb5803d4fb95fb +size 395900602 diff --git a/run-p5tu6jza/checkpoint-630/rng_state.pth b/run-p5tu6jza/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-p5tu6jza/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-p5tu6jza/checkpoint-630/scheduler.pt b/run-p5tu6jza/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..66c267184b49e6b940118104907ff9782b3fc3d5 --- /dev/null +++ b/run-p5tu6jza/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb89e33080ca30285919bf14d3907b1704d68c7d706dae804a33e235e7abd40e +size 1064 diff --git a/run-p5tu6jza/checkpoint-630/trainer_state.json b/run-p5tu6jza/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..85bf4b43b867c882c85accc920566485007a3756 --- /dev/null +++ b/run-p5tu6jza/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9152344908750255, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-p5tu6jza/checkpoint-630", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.633205284590377e-05, + "loss": 1.3188, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1513012647628784, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.8688, + "eval_samples_per_second": 458.523, + "eval_steps_per_second": 3.685, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00013266410569180755, + "loss": 0.9628, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00019899615853771128, + "loss": 0.8381, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8841463414634146, + "eval_f1": 0.8854543675055913, + "eval_loss": 0.8711126446723938, + "eval_precision": 0.8950059921865504, + "eval_recall": 0.8841463414634146, + "eval_runtime": 7.9882, + "eval_samples_per_second": 451.665, + "eval_steps_per_second": 3.63, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002653282113836151, + "loss": 0.7974, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9033291552565315, + "eval_loss": 0.8077923655509949, + "eval_precision": 0.9041433894860506, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.0908, + "eval_samples_per_second": 445.936, + "eval_steps_per_second": 3.584, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003316602642295188, + "loss": 0.7923, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00039799231707542257, + "loss": 0.7769, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.88332540628377, + "eval_loss": 0.825872004032135, + "eval_precision": 0.888254096302808, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.1835, + "eval_samples_per_second": 440.885, + "eval_steps_per_second": 3.544, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004643243699213263, + "loss": 0.7855, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004843958405482807, + "loss": 0.7703, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8852549889135255, + "eval_f1": 0.8614659670499215, + "eval_loss": 0.8643367886543274, + "eval_precision": 0.8726602035589426, + "eval_recall": 0.8852549889135255, + "eval_runtime": 7.7778, + "eval_samples_per_second": 463.887, + "eval_steps_per_second": 3.729, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004827145775004345, + "loss": 0.7767, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8944013303769401, + "eval_f1": 0.8834439300574406, + "eval_loss": 0.8428008556365967, + "eval_precision": 0.8849320439038928, + "eval_recall": 0.8944013303769401, + "eval_runtime": 8.1179, + "eval_samples_per_second": 444.449, + "eval_steps_per_second": 3.572, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.000479633380610365, + "loss": 0.7733, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00047517019669463965, + "loss": 0.7619, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8996674057649667, + "eval_f1": 0.8927958323955955, + "eval_loss": 0.8312403559684753, + "eval_precision": 0.8906566133102257, + "eval_recall": 0.8996674057649667, + "eval_runtime": 7.7014, + "eval_samples_per_second": 468.486, + "eval_steps_per_second": 3.766, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00046935102212606407, + "loss": 0.7554, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00046220975141455806, + "loss": 0.7507, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8725055432372506, + "eval_f1": 0.8454792916401634, + "eval_loss": 0.8923548460006714, + "eval_precision": 0.8612537336253757, + "eval_recall": 0.8725055432372506, + "eval_runtime": 8.0681, + "eval_samples_per_second": 447.195, + "eval_steps_per_second": 3.594, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00045378797978434435, + "loss": 0.7448, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8048780487804879, + "eval_f1": 0.8249545211644205, + "eval_loss": 0.983521044254303, + "eval_precision": 0.8790619429557502, + "eval_recall": 0.8048780487804879, + "eval_runtime": 7.6324, + "eval_samples_per_second": 472.723, + "eval_steps_per_second": 3.8, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0004441347608973642, + "loss": 0.7511, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004333063211340891, + "loss": 0.7459, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8827605321507761, + "eval_f1": 0.8566394005949085, + "eval_loss": 0.8948411345481873, + "eval_precision": 0.8774264627923165, + "eval_recall": 0.8827605321507761, + "eval_runtime": 8.1961, + "eval_samples_per_second": 440.209, + "eval_steps_per_second": 3.538, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004213657320959374, + "loss": 0.7383, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8827605321507761, + "eval_f1": 0.8827117209026983, + "eval_loss": 0.8575074672698975, + "eval_precision": 0.8942569435685355, + "eval_recall": 0.8827605321507761, + "eval_runtime": 7.8909, + "eval_samples_per_second": 457.238, + "eval_steps_per_second": 3.675, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00040838254323684504, + "loss": 0.7473, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00039443237676377454, + "loss": 0.7292, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8941241685144125, + "eval_f1": 0.8794521418624451, + "eval_loss": 0.8463320136070251, + "eval_precision": 0.8804807767639773, + "eval_recall": 0.8941241685144125, + "eval_runtime": 8.1611, + "eval_samples_per_second": 442.1, + "eval_steps_per_second": 3.553, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003795964871657146, + "loss": 0.7221, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00036396128793674844, + "loss": 0.7229, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8968957871396895, + "eval_f1": 0.8911383656156205, + "eval_loss": 0.8335779905319214, + "eval_precision": 0.8945757811265731, + "eval_recall": 0.8968957871396895, + "eval_runtime": 7.942, + "eval_samples_per_second": 454.291, + "eval_steps_per_second": 3.651, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003476178482498523, + "loss": 0.7222, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8957871396895787, + "eval_f1": 0.8929986476842874, + "eval_loss": 0.8421359062194824, + "eval_precision": 0.8953021009564096, + "eval_recall": 0.8957871396895787, + "eval_runtime": 7.9365, + "eval_samples_per_second": 454.607, + "eval_steps_per_second": 3.654, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003306613625131103, + "loss": 0.7241, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003131905958979803, + "loss": 0.7201, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.8957191056937194, + "eval_loss": 0.832567572593689, + "eval_precision": 0.8912777315741526, + "eval_recall": 0.9024390243902439, + "eval_runtime": 7.8352, + "eval_samples_per_second": 460.488, + "eval_steps_per_second": 3.701, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0002953073090692036, + "loss": 0.71, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0002771156654670887, + "loss": 0.7168, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.88470066518847, + "eval_f1": 0.8881462000157886, + "eval_loss": 0.8582449555397034, + "eval_precision": 0.8971648552876966, + "eval_recall": 0.88470066518847, + "eval_runtime": 8.0681, + "eval_samples_per_second": 447.193, + "eval_steps_per_second": 3.594, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0002587216245945278, + "loss": 0.7118, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9058404364624941, + "eval_loss": 0.8176126480102539, + "eval_precision": 0.906813179284527, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.9958, + "eval_samples_per_second": 451.237, + "eval_steps_per_second": 3.627, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002402323248426235, + "loss": 0.7059, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00022175545944973227, + "loss": 0.7002, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.908626478246406, + "eval_loss": 0.815597653388977, + "eval_precision": 0.9056271259755384, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.5288, + "eval_samples_per_second": 479.225, + "eval_steps_per_second": 3.852, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002033986492287318, + "loss": 0.6983, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00018526881571613905, + "loss": 0.7036, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9037582999898407, + "eval_loss": 0.8201684355735779, + "eval_precision": 0.9015947243817638, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.7273, + "eval_samples_per_second": 466.915, + "eval_steps_per_second": 3.753, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00016747155839425112, + "loss": 0.6951, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8952328159645233, + "eval_f1": 0.8940505868264133, + "eval_loss": 0.8415734171867371, + "eval_precision": 0.8935207842848221, + "eval_recall": 0.8952328159645233, + "eval_runtime": 7.5407, + "eval_samples_per_second": 478.47, + "eval_steps_per_second": 3.846, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00015011053961375606, + "loss": 0.6988, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00013328688079940933, + "loss": 0.695, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9091880600421776, + "eval_loss": 0.8160383701324463, + "eval_precision": 0.9076620612546528, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.8573, + "eval_samples_per_second": 459.192, + "eval_steps_per_second": 3.691, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.0001170985734556503, + "loss": 0.6935, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9104122390843779, + "eval_loss": 0.8160849809646606, + "eval_precision": 0.9089225975530801, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.2326, + "eval_samples_per_second": 438.259, + "eval_steps_per_second": 3.523, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00010163990840283216, + "loss": 0.6933, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 8.700092656854799e-05, + "loss": 0.6921, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9107477400836806, + "eval_loss": 0.8119679093360901, + "eval_precision": 0.9096693795294111, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8562, + "eval_samples_per_second": 459.256, + "eval_steps_per_second": 3.691, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 7.326689453298898e-05, + "loss": 0.6897, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.051780788308651e-05, + "loss": 0.6881, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9109843298213895, + "eval_loss": 0.8124634623527527, + "eval_precision": 0.909076880697265, + "eval_recall": 0.9143569844789357, + "eval_runtime": 7.8742, + "eval_samples_per_second": 458.203, + "eval_steps_per_second": 3.683, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 4.882792526821298e-05, + "loss": 0.6883, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9098877591082808, + "eval_loss": 0.8138753175735474, + "eval_precision": 0.9076469413117539, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.8958, + "eval_samples_per_second": 456.952, + "eval_steps_per_second": 3.673, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 3.826533587139282e-05, + "loss": 0.6861, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.8891562815342377e-05, + "loss": 0.6867, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9143099300203761, + "eval_loss": 0.8139218091964722, + "eval_precision": 0.912031031283455, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.9482, + "eval_samples_per_second": 453.939, + "eval_steps_per_second": 3.649, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.0761204813348393e-05, + "loss": 0.6864, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.392161815223514e-05, + "loss": 0.6847, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9143650745105159, + "eval_loss": 0.8107795715332031, + "eval_precision": 0.9123235932761957, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0251, + "eval_samples_per_second": 449.587, + "eval_steps_per_second": 3.614, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 8.412640859750557e-06, + "loss": 0.6867, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9090266682470386, + "eval_loss": 0.8108495473861694, + "eval_precision": 0.9060040264473824, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.2713, + "eval_samples_per_second": 436.206, + "eval_steps_per_second": 3.506, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 4.26636066299203e-06, + "loss": 0.6885, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.5069280894275435e-06, + "loss": 0.6857, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9127408065192087, + "eval_loss": 0.8093183636665344, + "eval_precision": 0.9106024198845617, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.0208, + "eval_samples_per_second": 449.832, + "eval_steps_per_second": 3.616, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.504157991287053e-07, + "loss": 0.6824, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9152344908750255, + "eval_loss": 0.8106203079223633, + "eval_precision": 0.9176788165803926, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.6125, + "eval_samples_per_second": 473.96, + "eval_steps_per_second": 3.81, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.12100926759305336, + "learning_rate": 0.0004847342323354506, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-p5tu6jza/checkpoint-630/training_args.bin b/run-p5tu6jza/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ab6e77c80a4405d07ee5e8bd7cc53aa1620b2dda --- /dev/null +++ b/run-p5tu6jza/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c83177c875345e50518c62f620b5395f703cb0d338e9b51f1ecab84bc690768 +size 4792 diff --git a/run-p91fgrnn/checkpoint-616/model.safetensors b/run-p91fgrnn/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..801cc7be9c720ac57d0ef22a168246486b9252cc --- /dev/null +++ b/run-p91fgrnn/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88583e7b8d4bcd8d11ad0757a17ba0166026612ca22bd94f7034653e66d8f5d4 +size 198025308 diff --git a/run-p91fgrnn/checkpoint-616/optimizer.pt b/run-p91fgrnn/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..19d9cbc85e17419293a28cc7045cfea04590692b --- /dev/null +++ b/run-p91fgrnn/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df15ef218e0759b9f2db8c2734f4478af0cb377cfeb504726cd9542ffa8f4d0a +size 395900602 diff --git a/run-p91fgrnn/checkpoint-616/rng_state.pth b/run-p91fgrnn/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-p91fgrnn/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-p91fgrnn/checkpoint-616/scheduler.pt b/run-p91fgrnn/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7489f1bfe8e21280ed8bd7f6f855782ec6c54e5a --- /dev/null +++ b/run-p91fgrnn/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77de87f737ce0a469760dd60c9e0d50204a6a0dd5596fccb947cd9dfa86971cf +size 1064 diff --git a/run-p91fgrnn/checkpoint-616/trainer_state.json b/run-p91fgrnn/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3eeeee1b148011e920334e2f8aefe255e8eac6c8 --- /dev/null +++ b/run-p91fgrnn/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9185705459185106, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-p91fgrnn/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.9105388504383195e-06, + "loss": 1.5073, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.815410199556541, + "eval_f1": 0.7486920576731614, + "eval_loss": 1.2670131921768188, + "eval_precision": 0.704827027051429, + "eval_recall": 0.815410199556541, + "eval_runtime": 8.2004, + "eval_samples_per_second": 439.977, + "eval_steps_per_second": 3.536, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.3821077700876639e-05, + "loss": 1.3216, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.0731616551314962e-05, + "loss": 1.0316, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 0.9548211693763733, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.284, + "eval_samples_per_second": 435.54, + "eval_steps_per_second": 3.501, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 2.7642155401753278e-05, + "loss": 0.9243, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8824833702882483, + "eval_f1": 0.8621423519793057, + "eval_loss": 0.8905470371246338, + "eval_precision": 0.8758494903520432, + "eval_recall": 0.8824833702882483, + "eval_runtime": 8.4437, + "eval_samples_per_second": 427.299, + "eval_steps_per_second": 3.435, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 3.455269425219161e-05, + "loss": 0.8825, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 4.1463233102629924e-05, + "loss": 0.8397, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.904379157427938, + "eval_f1": 0.8956998636501963, + "eval_loss": 0.8327401876449585, + "eval_precision": 0.8949211211857516, + "eval_recall": 0.904379157427938, + "eval_runtime": 8.3908, + "eval_samples_per_second": 429.992, + "eval_steps_per_second": 3.456, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 4.837377195306824e-05, + "loss": 0.824, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 5.5284310803506556e-05, + "loss": 0.8003, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8932926829268293, + "eval_f1": 0.8932600056665638, + "eval_loss": 0.8337185382843018, + "eval_precision": 0.8957161161110696, + "eval_recall": 0.8932926829268293, + "eval_runtime": 8.507, + "eval_samples_per_second": 424.12, + "eval_steps_per_second": 3.409, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 6.219484965394488e-05, + "loss": 0.7811, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9059553629738384, + "eval_loss": 0.8019067645072937, + "eval_precision": 0.90440660401566, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.2166, + "eval_samples_per_second": 439.112, + "eval_steps_per_second": 3.529, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 6.696865970800966e-05, + "loss": 0.7839, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 6.679121972084008e-05, + "loss": 0.777, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9071825262897594, + "eval_loss": 0.8005056381225586, + "eval_precision": 0.9067434470810737, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.2354, + "eval_samples_per_second": 438.107, + "eval_steps_per_second": 3.521, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 6.639522816322099e-05, + "loss": 0.771, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 6.578328383986882e-05, + "loss": 0.7655, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.900600132750172, + "eval_loss": 0.8209041953086853, + "eval_precision": 0.9008443891603041, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.2826, + "eval_samples_per_second": 435.615, + "eval_steps_per_second": 3.501, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 6.495940280557856e-05, + "loss": 0.7616, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9022417028235742, + "eval_loss": 0.8110632300376892, + "eval_precision": 0.9025154746795645, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.2068, + "eval_samples_per_second": 439.637, + "eval_steps_per_second": 3.534, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 6.392899200874726e-05, + "loss": 0.7564, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 6.269881380677161e-05, + "loss": 0.749, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9049334811529933, + "eval_f1": 0.9032223260851389, + "eval_loss": 0.8169792890548706, + "eval_precision": 0.9020249865257072, + "eval_recall": 0.9049334811529933, + "eval_runtime": 8.2936, + "eval_samples_per_second": 435.033, + "eval_steps_per_second": 3.497, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 6.127694158619703e-05, + "loss": 0.7452, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9099349793656997, + "eval_loss": 0.8063834309577942, + "eval_precision": 0.9080914201567512, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.883, + "eval_samples_per_second": 457.692, + "eval_steps_per_second": 3.679, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 5.967270677887338e-05, + "loss": 0.7443, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 5.7896637621838435e-05, + "loss": 0.737, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9056110410075422, + "eval_loss": 0.8034456968307495, + "eval_precision": 0.9028396693589551, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.812, + "eval_samples_per_second": 461.855, + "eval_steps_per_second": 3.712, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 5.596039006283467e-05, + "loss": 0.7423, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 5.3876671264910794e-05, + "loss": 0.7319, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9063959930935112, + "eval_loss": 0.8177195191383362, + "eval_precision": 0.9083911129435112, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.9249, + "eval_samples_per_second": 455.273, + "eval_steps_per_second": 3.659, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 5.1659156212130546e-05, + "loss": 0.7401, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9081079239228947, + "eval_loss": 0.806350588798523, + "eval_precision": 0.9042765988220678, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.3731, + "eval_samples_per_second": 430.906, + "eval_steps_per_second": 3.463, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 4.9322397963687e-05, + "loss": 0.7255, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 4.68817321454046e-05, + "loss": 0.7277, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9128989613960526, + "eval_loss": 0.7965473532676697, + "eval_precision": 0.9106034836010759, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.3265, + "eval_samples_per_second": 433.316, + "eval_steps_per_second": 3.483, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 4.435317630543008e-05, + "loss": 0.7258, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 4.175332479461843e-05, + "loss": 0.7265, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9055562530216602, + "eval_loss": 0.8128659129142761, + "eval_precision": 0.907649057135454, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.4771, + "eval_samples_per_second": 425.619, + "eval_steps_per_second": 3.421, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 3.909923986149089e-05, + "loss": 0.7222, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9027161862527716, + "eval_f1": 0.902754954259051, + "eval_loss": 0.8199866414070129, + "eval_precision": 0.9043215005534959, + "eval_recall": 0.9027161862527716, + "eval_runtime": 8.1338, + "eval_samples_per_second": 443.582, + "eval_steps_per_second": 3.565, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 3.640833967648413e-05, + "loss": 0.7207, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 3.369828402036309e-05, + "loss": 0.7204, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.907197118162757, + "eval_loss": 0.8118705153465271, + "eval_precision": 0.9063808841481296, + "eval_recall": 0.9085365853658537, + "eval_runtime": 7.8943, + "eval_samples_per_second": 457.039, + "eval_steps_per_second": 3.674, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 3.098685838699877e-05, + "loss": 0.7194, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 2.829185726111878e-05, + "loss": 0.7143, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.911041174694409, + "eval_loss": 0.8027032613754272, + "eval_precision": 0.9094616258294882, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.184, + "eval_samples_per_second": 440.862, + "eval_steps_per_second": 3.544, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 2.5630967337052975e-05, + "loss": 0.7187, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9042854366009004, + "eval_loss": 0.8085971474647522, + "eval_precision": 0.901896748440653, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.2101, + "eval_samples_per_second": 439.46, + "eval_steps_per_second": 3.532, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 2.302165144488342e-05, + "loss": 0.7117, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.0481033945765906e-05, + "loss": 0.7081, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9083319597206068, + "eval_loss": 0.8065605759620667, + "eval_precision": 0.9056325993591545, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0563, + "eval_samples_per_second": 447.847, + "eval_steps_per_second": 3.6, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.8025788348547763e-05, + "loss": 0.7138, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9096773009094383, + "eval_loss": 0.8061693906784058, + "eval_precision": 0.9075670590059649, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8083, + "eval_samples_per_second": 462.073, + "eval_steps_per_second": 3.714, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.5672027885229385e-05, + "loss": 0.7107, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.3435199763398414e-05, + "loss": 0.7087, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9087730786228176, + "eval_loss": 0.8075631856918335, + "eval_precision": 0.9066144314129545, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.9424, + "eval_samples_per_second": 454.27, + "eval_steps_per_second": 3.651, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.1329983789634266e-05, + "loss": 0.7087, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 9.37019602919518e-06, + "loss": 0.7068, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9170402176778887, + "eval_loss": 0.7964470982551575, + "eval_precision": 0.9151931032392132, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.0454, + "eval_samples_per_second": 448.457, + "eval_steps_per_second": 3.605, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 7.568698134247985e-06, + "loss": 0.7066, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9124186833457258, + "eval_loss": 0.8047044277191162, + "eval_precision": 0.9107977046392999, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.1688, + "eval_samples_per_second": 441.68, + "eval_steps_per_second": 3.55, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 5.9373129356990545e-06, + "loss": 0.7075, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 4.486746852578743e-06, + "loss": 0.7026, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.914140041964139, + "eval_loss": 0.7984603047370911, + "eval_precision": 0.9140918027445486, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.0797, + "eval_samples_per_second": 446.552, + "eval_steps_per_second": 3.589, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.2265196281892085e-06, + "loss": 0.7055, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.164901854141645e-06, + "loss": 0.7019, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9144409111861452, + "eval_loss": 0.8031311631202698, + "eval_precision": 0.9138079057040017, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.1843, + "eval_samples_per_second": 440.842, + "eval_steps_per_second": 3.543, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.3088606922989599e-06, + "loss": 0.7063, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.914435427994602, + "eval_loss": 0.8017306923866272, + "eval_precision": 0.9129132944696909, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.2414, + "eval_samples_per_second": 437.792, + "eval_steps_per_second": 3.519, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 6.640141508385441e-07, + "loss": 0.7056, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.3459421451132488e-07, + "loss": 0.7066, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9240576496674058, + "eval_f1": 0.9185705459185106, + "eval_loss": 0.797074019908905, + "eval_precision": 0.9152682001036645, + "eval_recall": 0.9240576496674058, + "eval_runtime": 8.1531, + "eval_samples_per_second": 442.53, + "eval_steps_per_second": 3.557, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.152017037420797, + "learning_rate": 6.697906885809449e-05, + "metric": "eval/loss", + "weight_decay": 0.18016151124851096 + } +} diff --git a/run-p91fgrnn/checkpoint-616/training_args.bin b/run-p91fgrnn/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2860e270d94462c5f686fdcd977efaef603fa9bf --- /dev/null +++ b/run-p91fgrnn/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2ace97c907025df3a7299b6a57ed661a5d575c489bcf8d50b8c84a74bbddd53 +size 4792 diff --git a/run-p91fgrnn/checkpoint-630/model.safetensors b/run-p91fgrnn/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74988e390dbf989bc408f713b18eb0efd5cf3c50 --- /dev/null +++ b/run-p91fgrnn/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d88fef1b961977dbd68bff5f9dc7684eccf81e7a6bfc6bc6338394f254980da0 +size 198025308 diff --git a/run-p91fgrnn/checkpoint-630/optimizer.pt b/run-p91fgrnn/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8587b10b2f48a7cbb28c9bafaea7b2cb48a5872a --- /dev/null +++ b/run-p91fgrnn/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65a3cfe22cbdb42768c41747b274f15107aac5ca93492105aa518e72c0ec94b +size 395900602 diff --git a/run-p91fgrnn/checkpoint-630/rng_state.pth b/run-p91fgrnn/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-p91fgrnn/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-p91fgrnn/checkpoint-630/scheduler.pt b/run-p91fgrnn/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..151aeaed3383932b11adcfef34e0bdffd620819b --- /dev/null +++ b/run-p91fgrnn/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c1929e02bb573380f3ac22941cccba4eb54fe43b604204d98ebcffff8db2f7 +size 1064 diff --git a/run-p91fgrnn/checkpoint-630/trainer_state.json b/run-p91fgrnn/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec18993cc3b63299f385fcdac06548e1cb4df81 --- /dev/null +++ b/run-p91fgrnn/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9185705459185106, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-p91fgrnn/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.9105388504383195e-06, + "loss": 1.5073, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.815410199556541, + "eval_f1": 0.7486920576731614, + "eval_loss": 1.2670131921768188, + "eval_precision": 0.704827027051429, + "eval_recall": 0.815410199556541, + "eval_runtime": 8.2004, + "eval_samples_per_second": 439.977, + "eval_steps_per_second": 3.536, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.3821077700876639e-05, + "loss": 1.3216, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.0731616551314962e-05, + "loss": 1.0316, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 0.9548211693763733, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.284, + "eval_samples_per_second": 435.54, + "eval_steps_per_second": 3.501, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 2.7642155401753278e-05, + "loss": 0.9243, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8824833702882483, + "eval_f1": 0.8621423519793057, + "eval_loss": 0.8905470371246338, + "eval_precision": 0.8758494903520432, + "eval_recall": 0.8824833702882483, + "eval_runtime": 8.4437, + "eval_samples_per_second": 427.299, + "eval_steps_per_second": 3.435, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 3.455269425219161e-05, + "loss": 0.8825, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 4.1463233102629924e-05, + "loss": 0.8397, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.904379157427938, + "eval_f1": 0.8956998636501963, + "eval_loss": 0.8327401876449585, + "eval_precision": 0.8949211211857516, + "eval_recall": 0.904379157427938, + "eval_runtime": 8.3908, + "eval_samples_per_second": 429.992, + "eval_steps_per_second": 3.456, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 4.837377195306824e-05, + "loss": 0.824, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 5.5284310803506556e-05, + "loss": 0.8003, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8932926829268293, + "eval_f1": 0.8932600056665638, + "eval_loss": 0.8337185382843018, + "eval_precision": 0.8957161161110696, + "eval_recall": 0.8932926829268293, + "eval_runtime": 8.507, + "eval_samples_per_second": 424.12, + "eval_steps_per_second": 3.409, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 6.219484965394488e-05, + "loss": 0.7811, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9059553629738384, + "eval_loss": 0.8019067645072937, + "eval_precision": 0.90440660401566, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.2166, + "eval_samples_per_second": 439.112, + "eval_steps_per_second": 3.529, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 6.696865970800966e-05, + "loss": 0.7839, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 6.679121972084008e-05, + "loss": 0.777, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9071825262897594, + "eval_loss": 0.8005056381225586, + "eval_precision": 0.9067434470810737, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.2354, + "eval_samples_per_second": 438.107, + "eval_steps_per_second": 3.521, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 6.639522816322099e-05, + "loss": 0.771, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 6.578328383986882e-05, + "loss": 0.7655, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.900600132750172, + "eval_loss": 0.8209041953086853, + "eval_precision": 0.9008443891603041, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.2826, + "eval_samples_per_second": 435.615, + "eval_steps_per_second": 3.501, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 6.495940280557856e-05, + "loss": 0.7616, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9022417028235742, + "eval_loss": 0.8110632300376892, + "eval_precision": 0.9025154746795645, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.2068, + "eval_samples_per_second": 439.637, + "eval_steps_per_second": 3.534, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 6.392899200874726e-05, + "loss": 0.7564, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 6.269881380677161e-05, + "loss": 0.749, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9049334811529933, + "eval_f1": 0.9032223260851389, + "eval_loss": 0.8169792890548706, + "eval_precision": 0.9020249865257072, + "eval_recall": 0.9049334811529933, + "eval_runtime": 8.2936, + "eval_samples_per_second": 435.033, + "eval_steps_per_second": 3.497, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 6.127694158619703e-05, + "loss": 0.7452, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9099349793656997, + "eval_loss": 0.8063834309577942, + "eval_precision": 0.9080914201567512, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.883, + "eval_samples_per_second": 457.692, + "eval_steps_per_second": 3.679, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 5.967270677887338e-05, + "loss": 0.7443, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 5.7896637621838435e-05, + "loss": 0.737, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9056110410075422, + "eval_loss": 0.8034456968307495, + "eval_precision": 0.9028396693589551, + "eval_recall": 0.914079822616408, + "eval_runtime": 7.812, + "eval_samples_per_second": 461.855, + "eval_steps_per_second": 3.712, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 5.596039006283467e-05, + "loss": 0.7423, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 5.3876671264910794e-05, + "loss": 0.7319, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9063959930935112, + "eval_loss": 0.8177195191383362, + "eval_precision": 0.9083911129435112, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.9249, + "eval_samples_per_second": 455.273, + "eval_steps_per_second": 3.659, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 5.1659156212130546e-05, + "loss": 0.7401, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9081079239228947, + "eval_loss": 0.806350588798523, + "eval_precision": 0.9042765988220678, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.3731, + "eval_samples_per_second": 430.906, + "eval_steps_per_second": 3.463, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 4.9322397963687e-05, + "loss": 0.7255, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 4.68817321454046e-05, + "loss": 0.7277, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9128989613960526, + "eval_loss": 0.7965473532676697, + "eval_precision": 0.9106034836010759, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.3265, + "eval_samples_per_second": 433.316, + "eval_steps_per_second": 3.483, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 4.435317630543008e-05, + "loss": 0.7258, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 4.175332479461843e-05, + "loss": 0.7265, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9055562530216602, + "eval_loss": 0.8128659129142761, + "eval_precision": 0.907649057135454, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.4771, + "eval_samples_per_second": 425.619, + "eval_steps_per_second": 3.421, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 3.909923986149089e-05, + "loss": 0.7222, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9027161862527716, + "eval_f1": 0.902754954259051, + "eval_loss": 0.8199866414070129, + "eval_precision": 0.9043215005534959, + "eval_recall": 0.9027161862527716, + "eval_runtime": 8.1338, + "eval_samples_per_second": 443.582, + "eval_steps_per_second": 3.565, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 3.640833967648413e-05, + "loss": 0.7207, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 3.369828402036309e-05, + "loss": 0.7204, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.907197118162757, + "eval_loss": 0.8118705153465271, + "eval_precision": 0.9063808841481296, + "eval_recall": 0.9085365853658537, + "eval_runtime": 7.8943, + "eval_samples_per_second": 457.039, + "eval_steps_per_second": 3.674, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 3.098685838699877e-05, + "loss": 0.7194, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 2.829185726111878e-05, + "loss": 0.7143, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.911041174694409, + "eval_loss": 0.8027032613754272, + "eval_precision": 0.9094616258294882, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.184, + "eval_samples_per_second": 440.862, + "eval_steps_per_second": 3.544, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 2.5630967337052975e-05, + "loss": 0.7187, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9042854366009004, + "eval_loss": 0.8085971474647522, + "eval_precision": 0.901896748440653, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.2101, + "eval_samples_per_second": 439.46, + "eval_steps_per_second": 3.532, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 2.302165144488342e-05, + "loss": 0.7117, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.0481033945765906e-05, + "loss": 0.7081, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9083319597206068, + "eval_loss": 0.8065605759620667, + "eval_precision": 0.9056325993591545, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.0563, + "eval_samples_per_second": 447.847, + "eval_steps_per_second": 3.6, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.8025788348547763e-05, + "loss": 0.7138, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9096773009094383, + "eval_loss": 0.8061693906784058, + "eval_precision": 0.9075670590059649, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.8083, + "eval_samples_per_second": 462.073, + "eval_steps_per_second": 3.714, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.5672027885229385e-05, + "loss": 0.7107, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.3435199763398414e-05, + "loss": 0.7087, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9087730786228176, + "eval_loss": 0.8075631856918335, + "eval_precision": 0.9066144314129545, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.9424, + "eval_samples_per_second": 454.27, + "eval_steps_per_second": 3.651, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.1329983789634266e-05, + "loss": 0.7087, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 9.37019602919518e-06, + "loss": 0.7068, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9170402176778887, + "eval_loss": 0.7964470982551575, + "eval_precision": 0.9151931032392132, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.0454, + "eval_samples_per_second": 448.457, + "eval_steps_per_second": 3.605, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 7.568698134247985e-06, + "loss": 0.7066, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9124186833457258, + "eval_loss": 0.8047044277191162, + "eval_precision": 0.9107977046392999, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.1688, + "eval_samples_per_second": 441.68, + "eval_steps_per_second": 3.55, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 5.9373129356990545e-06, + "loss": 0.7075, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 4.486746852578743e-06, + "loss": 0.7026, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.914140041964139, + "eval_loss": 0.7984603047370911, + "eval_precision": 0.9140918027445486, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.0797, + "eval_samples_per_second": 446.552, + "eval_steps_per_second": 3.589, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.2265196281892085e-06, + "loss": 0.7055, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.164901854141645e-06, + "loss": 0.7019, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9144409111861452, + "eval_loss": 0.8031311631202698, + "eval_precision": 0.9138079057040017, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.1843, + "eval_samples_per_second": 440.842, + "eval_steps_per_second": 3.543, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.3088606922989599e-06, + "loss": 0.7063, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.914435427994602, + "eval_loss": 0.8017306923866272, + "eval_precision": 0.9129132944696909, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.2414, + "eval_samples_per_second": 437.792, + "eval_steps_per_second": 3.519, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 6.640141508385441e-07, + "loss": 0.7056, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.3459421451132488e-07, + "loss": 0.7066, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9240576496674058, + "eval_f1": 0.9185705459185106, + "eval_loss": 0.797074019908905, + "eval_precision": 0.9152682001036645, + "eval_recall": 0.9240576496674058, + "eval_runtime": 8.1531, + "eval_samples_per_second": 442.53, + "eval_steps_per_second": 3.557, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 2.3419071064992688e-08, + "loss": 0.7068, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9146438499064431, + "eval_loss": 0.8050375580787659, + "eval_precision": 0.913596753368037, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.9241, + "eval_samples_per_second": 455.322, + "eval_steps_per_second": 3.66, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.152017037420797, + "learning_rate": 6.697906885809449e-05, + "metric": "eval/loss", + "weight_decay": 0.18016151124851096 + } +} diff --git a/run-p91fgrnn/checkpoint-630/training_args.bin b/run-p91fgrnn/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2860e270d94462c5f686fdcd977efaef603fa9bf --- /dev/null +++ b/run-p91fgrnn/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2ace97c907025df3a7299b6a57ed661a5d575c489bcf8d50b8c84a74bbddd53 +size 4792 diff --git a/run-pacfm46i/checkpoint-616/model.safetensors b/run-pacfm46i/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6952a99965807734548ee9a6c12f7e4d4a349fd --- /dev/null +++ b/run-pacfm46i/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97aa41093f0238ee763924d9a3cbed3713f2faca37660893e7139a3046216562 +size 198025308 diff --git a/run-pacfm46i/checkpoint-616/optimizer.pt b/run-pacfm46i/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..577cb95e6425f467b4ed4dab08440fbece08e783 --- /dev/null +++ b/run-pacfm46i/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d733c0f72bacce9a59f10a5bc7ae07f16678f9cd52ceff3b5f063ab975bd0c +size 395900602 diff --git a/run-pacfm46i/checkpoint-616/rng_state.pth b/run-pacfm46i/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-pacfm46i/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-pacfm46i/checkpoint-616/scheduler.pt b/run-pacfm46i/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..37e830a40ea6754d8d070ebf08d8e23ab84762a1 --- /dev/null +++ b/run-pacfm46i/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:babd3842f10f9d8bd4b0cf9af66377fe7036d57cc5887d89d22c39ed4493bbe6 +size 1064 diff --git a/run-pacfm46i/checkpoint-616/trainer_state.json b/run-pacfm46i/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..069de187e49cc1917f7c810c5890fb96441a8b0f --- /dev/null +++ b/run-pacfm46i/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9201760596208628, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-pacfm46i/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.6485915447176403e-05, + "loss": 1.4855, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.0238878726959229, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2236, + "eval_samples_per_second": 438.739, + "eval_steps_per_second": 3.526, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.2971830894352806e-05, + "loss": 1.1173, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.945774634152921e-05, + "loss": 0.9379, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8725055432372506, + "eval_f1": 0.8387750017267637, + "eval_loss": 0.9738784432411194, + "eval_precision": 0.8665422112681861, + "eval_recall": 0.8725055432372506, + "eval_runtime": 7.5772, + "eval_samples_per_second": 476.167, + "eval_steps_per_second": 3.827, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.594366178870561e-05, + "loss": 0.8645, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.903345619858151, + "eval_loss": 0.8674681782722473, + "eval_precision": 0.9000064831253841, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.1567, + "eval_samples_per_second": 442.338, + "eval_steps_per_second": 3.555, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 8.242957723588202e-05, + "loss": 0.8267, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.891549268305842e-05, + "loss": 0.8022, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9046336795025958, + "eval_loss": 0.8086495995521545, + "eval_precision": 0.9003895532409713, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.856, + "eval_samples_per_second": 459.268, + "eval_steps_per_second": 3.691, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011540140813023482, + "loss": 0.7991, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00012038989489432099, + "loss": 0.7766, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.907109207244021, + "eval_loss": 0.8067899942398071, + "eval_precision": 0.9071371900569296, + "eval_recall": 0.9101995565410199, + "eval_runtime": 8.1274, + "eval_samples_per_second": 443.932, + "eval_steps_per_second": 3.568, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011997204018815586, + "loss": 0.7786, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9047677672818952, + "eval_loss": 0.8040274977684021, + "eval_precision": 0.9018778999556504, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.1769, + "eval_samples_per_second": 441.245, + "eval_steps_per_second": 3.547, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011920625126370037, + "loss": 0.7701, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011809698855430137, + "loss": 0.7639, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9121614405671676, + "eval_loss": 0.7985339164733887, + "eval_precision": 0.9082048884863358, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.7832, + "eval_samples_per_second": 463.563, + "eval_steps_per_second": 3.726, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00011665071309931488, + "loss": 0.7592, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00011487584891097466, + "loss": 0.7485, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9070702229403478, + "eval_loss": 0.8066944479942322, + "eval_precision": 0.903686494252266, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.9493, + "eval_samples_per_second": 453.877, + "eval_steps_per_second": 3.648, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00011278273390767945, + "loss": 0.7451, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9120216108815606, + "eval_loss": 0.8016820549964905, + "eval_precision": 0.9120991969183748, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.7418, + "eval_samples_per_second": 466.043, + "eval_steps_per_second": 3.746, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001103835596994947, + "loss": 0.748, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001076923005765951, + "loss": 0.7395, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9115439470866833, + "eval_loss": 0.8016670942306519, + "eval_precision": 0.9106920644769579, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.3643, + "eval_samples_per_second": 489.931, + "eval_steps_per_second": 3.938, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00010472463211426429, + "loss": 0.732, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8996674057649667, + "eval_f1": 0.9010905799060916, + "eval_loss": 0.8230919241905212, + "eval_precision": 0.9045548676950196, + "eval_recall": 0.8996674057649667, + "eval_runtime": 7.7532, + "eval_samples_per_second": 465.357, + "eval_steps_per_second": 3.74, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00010149783986854627, + "loss": 0.7386, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.803071869436301e-05, + "loss": 0.7272, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8896895787139689, + "eval_f1": 0.8947834249737165, + "eval_loss": 0.8466248512268066, + "eval_precision": 0.9057202205282897, + "eval_recall": 0.8896895787139689, + "eval_runtime": 8.0517, + "eval_samples_per_second": 448.107, + "eval_steps_per_second": 3.602, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 9.434346327253169e-05, + "loss": 0.7197, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 9.045755048332105e-05, + "loss": 0.7181, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9057777973351369, + "eval_loss": 0.8138097524642944, + "eval_precision": 0.9041378052234703, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.1884, + "eval_samples_per_second": 440.626, + "eval_steps_per_second": 3.542, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.639561431167677e-05, + "loss": 0.7174, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.89820239293116, + "eval_loss": 0.8274371027946472, + "eval_precision": 0.9039486731339669, + "eval_recall": 0.9093680709534369, + "eval_runtime": 7.9593, + "eval_samples_per_second": 453.308, + "eval_steps_per_second": 3.644, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 8.218131401274603e-05, + "loss": 0.7172, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.783919630558733e-05, + "loss": 0.7161, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9070261196321503, + "eval_loss": 0.8128407001495361, + "eval_precision": 0.9074984304534995, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8422, + "eval_samples_per_second": 460.074, + "eval_steps_per_second": 3.698, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 7.339455239773604e-05, + "loss": 0.7118, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.887327067340367e-05, + "loss": 0.7168, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8871951219512195, + "eval_f1": 0.8928512395983489, + "eval_loss": 0.8452953696250916, + "eval_precision": 0.9067276774452198, + "eval_recall": 0.8871951219512195, + "eval_runtime": 7.8052, + "eval_samples_per_second": 462.258, + "eval_steps_per_second": 3.715, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.430168590334673e-05, + "loss": 0.7081, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9159569993312129, + "eval_loss": 0.8037598133087158, + "eval_precision": 0.9129131434315576, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.9591, + "eval_samples_per_second": 453.316, + "eval_steps_per_second": 3.644, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.9706425854701675e-05, + "loss": 0.7109, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.5114256194225456e-05, + "loss": 0.7067, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9188528025052493, + "eval_loss": 0.8002427816390991, + "eval_precision": 0.9167553053532609, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.1574, + "eval_samples_per_second": 442.3, + "eval_steps_per_second": 3.555, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.055192458832272e-05, + "loss": 0.7072, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.6046004907918214e-05, + "loss": 0.7067, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9090057250327799, + "eval_loss": 0.8111190795898438, + "eval_precision": 0.9060200834249634, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.1357, + "eval_samples_per_second": 443.477, + "eval_steps_per_second": 3.565, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.1622742445622745e-05, + "loss": 0.7013, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.9008775748354102, + "eval_loss": 0.8301903009414673, + "eval_precision": 0.9049821334466774, + "eval_recall": 0.8991130820399114, + "eval_runtime": 7.8813, + "eval_samples_per_second": 457.793, + "eval_steps_per_second": 3.68, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.730790104674453e-05, + "loss": 0.7015, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.31266130445507e-05, + "loss": 0.7049, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9159134487453285, + "eval_loss": 0.8049315214157104, + "eval_precision": 0.9153194971770372, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.1922, + "eval_samples_per_second": 440.422, + "eval_steps_per_second": 3.54, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.910323287384946e-05, + "loss": 0.6997, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9066827990198221, + "eval_loss": 0.8146717548370361, + "eval_precision": 0.9036194261537234, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.0835, + "eval_samples_per_second": 446.343, + "eval_steps_per_second": 3.588, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.526119521553932e-05, + "loss": 0.7002, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.1622878498379803e-05, + "loss": 0.6985, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9134981687648804, + "eval_loss": 0.8031349778175354, + "eval_precision": 0.9131968869352705, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.9769, + "eval_samples_per_second": 452.307, + "eval_steps_per_second": 3.636, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.820947455303485e-05, + "loss": 0.6966, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.5040865177605319e-05, + "loss": 0.6962, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9101291540677008, + "eval_loss": 0.8082433938980103, + "eval_precision": 0.9082006851277088, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.9324, + "eval_samples_per_second": 454.845, + "eval_steps_per_second": 3.656, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.2135506333609836e-05, + "loss": 0.6981, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9098636523738935, + "eval_loss": 0.8069301843643188, + "eval_precision": 0.9069489878744896, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.1002, + "eval_samples_per_second": 445.419, + "eval_steps_per_second": 3.58, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 9.510320646929061e-06, + "loss": 0.6923, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 7.180598839855349e-06, + "loss": 0.6954, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9164996853121001, + "eval_loss": 0.802531898021698, + "eval_precision": 0.9141107745394401, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.4251, + "eval_samples_per_second": 485.922, + "eval_steps_per_second": 3.906, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.159910668368672e-06, + "loss": 0.6954, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.460025883396096e-06, + "loss": 0.6954, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9093958262839731, + "eval_loss": 0.8087734580039978, + "eval_precision": 0.9065817542315522, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.4656, + "eval_samples_per_second": 483.284, + "eval_steps_per_second": 3.884, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.090845676425853e-06, + "loss": 0.6937, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9128764959094755, + "eval_loss": 0.8041980266571045, + "eval_precision": 0.9100163920644428, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.2106, + "eval_samples_per_second": 439.432, + "eval_steps_per_second": 3.532, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.0603450087794093e-06, + "loss": 0.6953, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.745261604520312e-07, + "loss": 0.6955, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9235033259423503, + "eval_f1": 0.9201760596208628, + "eval_loss": 0.798057496547699, + "eval_precision": 0.9179842697428078, + "eval_recall": 0.9235033259423503, + "eval_runtime": 7.8257, + "eval_samples_per_second": 461.043, + "eval_steps_per_second": 3.706, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4479163416152068, + "learning_rate": 0.0001204739974985968, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-pacfm46i/checkpoint-616/training_args.bin b/run-pacfm46i/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d94495f0dcf21d3c3c6435f3bbfc209060e92e5 --- /dev/null +++ b/run-pacfm46i/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be4d701e6101dd74e2f00f849b11c3a7b0434aed6c0204191f48e0f5d67464d +size 4792 diff --git a/run-pacfm46i/checkpoint-630/model.safetensors b/run-pacfm46i/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..477b59445b0764998b8727ca7b584e1576029372 --- /dev/null +++ b/run-pacfm46i/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0909dd594b64fc76939a4d7766eff14c13d3037c8de57bc3dd326835c93668e +size 198025308 diff --git a/run-pacfm46i/checkpoint-630/optimizer.pt b/run-pacfm46i/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..721668e9d6ca88903162c20ac58ba7df9d5638f7 --- /dev/null +++ b/run-pacfm46i/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9160fb780a86fe968551945ab1fe31ea1006480ab385af0a85c26880052ba4f +size 395900602 diff --git a/run-pacfm46i/checkpoint-630/rng_state.pth b/run-pacfm46i/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-pacfm46i/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-pacfm46i/checkpoint-630/scheduler.pt b/run-pacfm46i/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ab25956f7c74efbeb20ed2440525d9400c46a53 --- /dev/null +++ b/run-pacfm46i/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8e5c896c927501c97d609d5fffa31661bbad638d0f960b2b1462b805437eb5e +size 1064 diff --git a/run-pacfm46i/checkpoint-630/trainer_state.json b/run-pacfm46i/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d68333acb6b3535336e9306635923609530470ab --- /dev/null +++ b/run-pacfm46i/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9201760596208628, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-pacfm46i/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.6485915447176403e-05, + "loss": 1.4855, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.0238878726959229, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2236, + "eval_samples_per_second": 438.739, + "eval_steps_per_second": 3.526, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.2971830894352806e-05, + "loss": 1.1173, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.945774634152921e-05, + "loss": 0.9379, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8725055432372506, + "eval_f1": 0.8387750017267637, + "eval_loss": 0.9738784432411194, + "eval_precision": 0.8665422112681861, + "eval_recall": 0.8725055432372506, + "eval_runtime": 7.5772, + "eval_samples_per_second": 476.167, + "eval_steps_per_second": 3.827, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.594366178870561e-05, + "loss": 0.8645, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.903345619858151, + "eval_loss": 0.8674681782722473, + "eval_precision": 0.9000064831253841, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.1567, + "eval_samples_per_second": 442.338, + "eval_steps_per_second": 3.555, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 8.242957723588202e-05, + "loss": 0.8267, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.891549268305842e-05, + "loss": 0.8022, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9046336795025958, + "eval_loss": 0.8086495995521545, + "eval_precision": 0.9003895532409713, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.856, + "eval_samples_per_second": 459.268, + "eval_steps_per_second": 3.691, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011540140813023482, + "loss": 0.7991, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00012038989489432099, + "loss": 0.7766, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.907109207244021, + "eval_loss": 0.8067899942398071, + "eval_precision": 0.9071371900569296, + "eval_recall": 0.9101995565410199, + "eval_runtime": 8.1274, + "eval_samples_per_second": 443.932, + "eval_steps_per_second": 3.568, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011997204018815586, + "loss": 0.7786, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9047677672818952, + "eval_loss": 0.8040274977684021, + "eval_precision": 0.9018778999556504, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.1769, + "eval_samples_per_second": 441.245, + "eval_steps_per_second": 3.547, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00011920625126370037, + "loss": 0.7701, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00011809698855430137, + "loss": 0.7639, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9121614405671676, + "eval_loss": 0.7985339164733887, + "eval_precision": 0.9082048884863358, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.7832, + "eval_samples_per_second": 463.563, + "eval_steps_per_second": 3.726, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00011665071309931488, + "loss": 0.7592, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00011487584891097466, + "loss": 0.7485, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9070702229403478, + "eval_loss": 0.8066944479942322, + "eval_precision": 0.903686494252266, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.9493, + "eval_samples_per_second": 453.877, + "eval_steps_per_second": 3.648, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00011278273390767945, + "loss": 0.7451, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9120216108815606, + "eval_loss": 0.8016820549964905, + "eval_precision": 0.9120991969183748, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.7418, + "eval_samples_per_second": 466.043, + "eval_steps_per_second": 3.746, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001103835596994947, + "loss": 0.748, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001076923005765951, + "loss": 0.7395, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9115439470866833, + "eval_loss": 0.8016670942306519, + "eval_precision": 0.9106920644769579, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.3643, + "eval_samples_per_second": 489.931, + "eval_steps_per_second": 3.938, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00010472463211426429, + "loss": 0.732, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8996674057649667, + "eval_f1": 0.9010905799060916, + "eval_loss": 0.8230919241905212, + "eval_precision": 0.9045548676950196, + "eval_recall": 0.8996674057649667, + "eval_runtime": 7.7532, + "eval_samples_per_second": 465.357, + "eval_steps_per_second": 3.74, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00010149783986854627, + "loss": 0.7386, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 9.803071869436301e-05, + "loss": 0.7272, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8896895787139689, + "eval_f1": 0.8947834249737165, + "eval_loss": 0.8466248512268066, + "eval_precision": 0.9057202205282897, + "eval_recall": 0.8896895787139689, + "eval_runtime": 8.0517, + "eval_samples_per_second": 448.107, + "eval_steps_per_second": 3.602, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 9.434346327253169e-05, + "loss": 0.7197, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 9.045755048332105e-05, + "loss": 0.7181, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9057777973351369, + "eval_loss": 0.8138097524642944, + "eval_precision": 0.9041378052234703, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.1884, + "eval_samples_per_second": 440.626, + "eval_steps_per_second": 3.542, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 8.639561431167677e-05, + "loss": 0.7174, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.89820239293116, + "eval_loss": 0.8274371027946472, + "eval_precision": 0.9039486731339669, + "eval_recall": 0.9093680709534369, + "eval_runtime": 7.9593, + "eval_samples_per_second": 453.308, + "eval_steps_per_second": 3.644, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 8.218131401274603e-05, + "loss": 0.7172, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 7.783919630558733e-05, + "loss": 0.7161, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9070261196321503, + "eval_loss": 0.8128407001495361, + "eval_precision": 0.9074984304534995, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8422, + "eval_samples_per_second": 460.074, + "eval_steps_per_second": 3.698, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 7.339455239773604e-05, + "loss": 0.7118, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 6.887327067340367e-05, + "loss": 0.7168, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8871951219512195, + "eval_f1": 0.8928512395983489, + "eval_loss": 0.8452953696250916, + "eval_precision": 0.9067276774452198, + "eval_recall": 0.8871951219512195, + "eval_runtime": 7.8052, + "eval_samples_per_second": 462.258, + "eval_steps_per_second": 3.715, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 6.430168590334673e-05, + "loss": 0.7081, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9159569993312129, + "eval_loss": 0.8037598133087158, + "eval_precision": 0.9129131434315576, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.9591, + "eval_samples_per_second": 453.316, + "eval_steps_per_second": 3.644, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 5.9706425854701675e-05, + "loss": 0.7109, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 5.5114256194225456e-05, + "loss": 0.7067, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9188528025052493, + "eval_loss": 0.8002427816390991, + "eval_precision": 0.9167553053532609, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.1574, + "eval_samples_per_second": 442.3, + "eval_steps_per_second": 3.555, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.055192458832272e-05, + "loss": 0.7072, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 4.6046004907918214e-05, + "loss": 0.7067, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9090057250327799, + "eval_loss": 0.8111190795898438, + "eval_precision": 0.9060200834249634, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.1357, + "eval_samples_per_second": 443.477, + "eval_steps_per_second": 3.565, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.1622742445622745e-05, + "loss": 0.7013, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.9008775748354102, + "eval_loss": 0.8301903009414673, + "eval_precision": 0.9049821334466774, + "eval_recall": 0.8991130820399114, + "eval_runtime": 7.8813, + "eval_samples_per_second": 457.793, + "eval_steps_per_second": 3.68, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 3.730790104674453e-05, + "loss": 0.7015, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.31266130445507e-05, + "loss": 0.7049, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9159134487453285, + "eval_loss": 0.8049315214157104, + "eval_precision": 0.9153194971770372, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.1922, + "eval_samples_per_second": 440.422, + "eval_steps_per_second": 3.54, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.910323287384946e-05, + "loss": 0.6997, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9066827990198221, + "eval_loss": 0.8146717548370361, + "eval_precision": 0.9036194261537234, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.0835, + "eval_samples_per_second": 446.343, + "eval_steps_per_second": 3.588, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.526119521553932e-05, + "loss": 0.7002, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.1622878498379803e-05, + "loss": 0.6985, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9134981687648804, + "eval_loss": 0.8031349778175354, + "eval_precision": 0.9131968869352705, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.9769, + "eval_samples_per_second": 452.307, + "eval_steps_per_second": 3.636, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.820947455303485e-05, + "loss": 0.6966, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.5040865177605319e-05, + "loss": 0.6962, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9101291540677008, + "eval_loss": 0.8082433938980103, + "eval_precision": 0.9082006851277088, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.9324, + "eval_samples_per_second": 454.845, + "eval_steps_per_second": 3.656, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.2135506333609836e-05, + "loss": 0.6981, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9098636523738935, + "eval_loss": 0.8069301843643188, + "eval_precision": 0.9069489878744896, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.1002, + "eval_samples_per_second": 445.419, + "eval_steps_per_second": 3.58, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 9.510320646929061e-06, + "loss": 0.6923, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 7.180598839855349e-06, + "loss": 0.6954, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9164996853121001, + "eval_loss": 0.802531898021698, + "eval_precision": 0.9141107745394401, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.4251, + "eval_samples_per_second": 485.922, + "eval_steps_per_second": 3.906, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.159910668368672e-06, + "loss": 0.6954, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.460025883396096e-06, + "loss": 0.6954, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9093958262839731, + "eval_loss": 0.8087734580039978, + "eval_precision": 0.9065817542315522, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.4656, + "eval_samples_per_second": 483.284, + "eval_steps_per_second": 3.884, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.090845676425853e-06, + "loss": 0.6937, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9128764959094755, + "eval_loss": 0.8041980266571045, + "eval_precision": 0.9100163920644428, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.2106, + "eval_samples_per_second": 439.432, + "eval_steps_per_second": 3.532, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.0603450087794093e-06, + "loss": 0.6953, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 3.745261604520312e-07, + "loss": 0.6955, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9235033259423503, + "eval_f1": 0.9201760596208628, + "eval_loss": 0.798057496547699, + "eval_precision": 0.9179842697428078, + "eval_recall": 0.9235033259423503, + "eval_runtime": 7.8257, + "eval_samples_per_second": 461.043, + "eval_steps_per_second": 3.706, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 3.7383769082437477e-08, + "loss": 0.6914, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9141568096207465, + "eval_loss": 0.8044596910476685, + "eval_precision": 0.9125677871912332, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9775, + "eval_samples_per_second": 452.271, + "eval_steps_per_second": 3.635, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4479163416152068, + "learning_rate": 0.0001204739974985968, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-pacfm46i/checkpoint-630/training_args.bin b/run-pacfm46i/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d94495f0dcf21d3c3c6435f3bbfc209060e92e5 --- /dev/null +++ b/run-pacfm46i/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be4d701e6101dd74e2f00f849b11c3a7b0434aed6c0204191f48e0f5d67464d +size 4792 diff --git a/run-pg2pljei/checkpoint-1147/model.safetensors b/run-pg2pljei/checkpoint-1147/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..805de6ec154e8530bbc8c9ff73c2f70ba0a948c5 --- /dev/null +++ b/run-pg2pljei/checkpoint-1147/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c463f4cd84a9a38c87133a3f9f79029669373a95f8d824b9078a85b1fa3b382a +size 198025308 diff --git a/run-pg2pljei/checkpoint-1147/optimizer.pt b/run-pg2pljei/checkpoint-1147/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..711846f25fdd585abe22d5716883b97bdb9910db --- /dev/null +++ b/run-pg2pljei/checkpoint-1147/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ada14263ddf1d0580ff11f487bd156992e94d5dd19a8ffcc93b01cbb3dc77b98 +size 395900602 diff --git a/run-pg2pljei/checkpoint-1147/rng_state.pth b/run-pg2pljei/checkpoint-1147/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..43b1a3175dffb3289ba56a1e7f78b36ca1615834 --- /dev/null +++ b/run-pg2pljei/checkpoint-1147/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2d43d63184b1920f250efdd6f38efa027691f238090c0a0b0f43317419a2de +size 14244 diff --git a/run-pg2pljei/checkpoint-1147/scheduler.pt b/run-pg2pljei/checkpoint-1147/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..79248e1165bf3e5df2290c4dd537eb3ebacffbbb --- /dev/null +++ b/run-pg2pljei/checkpoint-1147/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc05038674f5c3881f043f9aedf91ffa51df8f5b3de821ea149835a3b5e3390 +size 1064 diff --git a/run-pg2pljei/checkpoint-1147/trainer_state.json b/run-pg2pljei/checkpoint-1147/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e0b30d0fdd620c0539e8f166d3b23872b2e5a70f --- /dev/null +++ b/run-pg2pljei/checkpoint-1147/trainer_state.json @@ -0,0 +1,534 @@ +{ + "best_metric": 0.9185144124168514, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-pg2pljei/checkpoint-1147", + "epoch": 26.988235294117647, + "eval_steps": 500, + "global_step": 1147, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.035276739219069e-05, + "loss": 1.1954, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.9694392681121826, + "eval_runtime": 6.7187, + "eval_samples_per_second": 537.01, + "eval_steps_per_second": 8.484, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00016070553478438138, + "loss": 0.856, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002410583021765721, + "loss": 0.8108, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.897450110864745, + "eval_loss": 0.8358007073402405, + "eval_runtime": 6.8533, + "eval_samples_per_second": 526.463, + "eval_steps_per_second": 8.317, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00032141106956876277, + "loss": 0.8026, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8957871396895787, + "eval_loss": 0.8440775275230408, + "eval_runtime": 6.7577, + "eval_samples_per_second": 533.906, + "eval_steps_per_second": 8.435, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004017638369609535, + "loss": 0.8089, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004821166043531442, + "loss": 0.7961, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.4844789356984479, + "eval_loss": 1.4451898336410522, + "eval_runtime": 6.9711, + "eval_samples_per_second": 517.563, + "eval_steps_per_second": 8.177, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005624693717453348, + "loss": 0.8036, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006211217361486711, + "loss": 0.8146, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8747228381374723, + "eval_loss": 0.8752522468566895, + "eval_runtime": 7.2634, + "eval_samples_per_second": 496.734, + "eval_steps_per_second": 7.848, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006197015599435306, + "loss": 0.8164, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8594789356984479, + "eval_loss": 0.9122125506401062, + "eval_runtime": 6.7718, + "eval_samples_per_second": 532.799, + "eval_steps_per_second": 8.417, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0006164433717590094, + "loss": 0.8086, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0006113665454199119, + "loss": 0.7961, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.7774390243902439, + "eval_loss": 0.9890197515487671, + "eval_runtime": 6.837, + "eval_samples_per_second": 527.714, + "eval_steps_per_second": 8.337, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0006045012687295471, + "loss": 0.8009, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005958883639671395, + "loss": 0.7869, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8808203991130821, + "eval_loss": 0.8744761347770691, + "eval_runtime": 6.9095, + "eval_samples_per_second": 522.183, + "eval_steps_per_second": 8.25, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005855790451505651, + "loss": 0.7852, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8799889135254989, + "eval_loss": 0.8779320120811462, + "eval_runtime": 6.7899, + "eval_samples_per_second": 531.381, + "eval_steps_per_second": 8.395, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005736346135077756, + "loss": 0.7963, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005601260929676946, + "loss": 0.7907, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8694567627494457, + "eval_loss": 0.8739984631538391, + "eval_runtime": 7.0185, + "eval_samples_per_second": 514.068, + "eval_steps_per_second": 8.121, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005451338078380334, + "loss": 0.7751, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.88470066518847, + "eval_loss": 0.85569167137146, + "eval_runtime": 6.9684, + "eval_samples_per_second": 517.768, + "eval_steps_per_second": 8.18, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005287469051812307, + "loss": 0.7714, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0005110628247285683, + "loss": 0.7668, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8883037694013304, + "eval_loss": 0.8574525713920593, + "eval_runtime": 6.8333, + "eval_samples_per_second": 528.006, + "eval_steps_per_second": 8.342, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004921867194844513, + "loss": 0.7602, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004722308304660598, + "loss": 0.7611, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8736141906873615, + "eval_loss": 0.8688214421272278, + "eval_runtime": 6.7769, + "eval_samples_per_second": 532.399, + "eval_steps_per_second": 8.411, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00045131381929628604, + "loss": 0.7578, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8484098315238953, + "eval_runtime": 6.9591, + "eval_samples_per_second": 518.458, + "eval_steps_per_second": 8.191, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00042956006261849427, + "loss": 0.7608, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00040709891252865024, + "loss": 0.742, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8440089821815491, + "eval_runtime": 6.7657, + "eval_samples_per_second": 533.279, + "eval_steps_per_second": 8.425, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.000384063927422435, + "loss": 0.7556, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003605920778308776, + "loss": 0.7429, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8473629951477051, + "eval_runtime": 6.7259, + "eval_samples_per_second": 536.436, + "eval_steps_per_second": 8.475, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.000336822931966762, + "loss": 0.7382, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8268345594406128, + "eval_runtime": 6.6873, + "eval_samples_per_second": 539.534, + "eval_steps_per_second": 8.524, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003128978258247111, + "loss": 0.7309, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00028895902276969285, + "loss": 0.7284, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8397396206855774, + "eval_runtime": 6.9756, + "eval_samples_per_second": 517.231, + "eval_steps_per_second": 8.171, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002651488676112012, + "loss": 0.7225, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00024160894019315186, + "loss": 0.7221, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9010532150776053, + "eval_loss": 0.8316428661346436, + "eval_runtime": 6.744, + "eval_samples_per_second": 534.991, + "eval_steps_per_second": 8.452, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002184792135324129, + "loss": 0.7264, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8261064291000366, + "eval_runtime": 6.9855, + "eval_samples_per_second": 516.497, + "eval_steps_per_second": 8.16, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00019589722151184424, + "loss": 0.7178, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00017399724107690408, + "loss": 0.7128, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8171206116676331, + "eval_runtime": 6.5471, + "eval_samples_per_second": 551.082, + "eval_steps_per_second": 8.706, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00015290949379864475, + "loss": 0.7051, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8251943588256836, + "eval_runtime": 6.8382, + "eval_samples_per_second": 527.622, + "eval_steps_per_second": 8.335, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.000132759371550757, + "loss": 0.7075, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00011366669090494078, + "loss": 0.7074, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8129871487617493, + "eval_runtime": 6.693, + "eval_samples_per_second": 539.07, + "eval_steps_per_second": 8.516, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.574498067811354e-05, + "loss": 0.7071, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 7.910080686783936e-05, + "loss": 0.7014, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8132690191268921, + "eval_runtime": 6.8111, + "eval_samples_per_second": 529.727, + "eval_steps_per_second": 8.369, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.383313899004426e-05, + "loss": 0.6983, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8120121955871582, + "eval_runtime": 6.9527, + "eval_samples_per_second": 518.935, + "eval_steps_per_second": 8.198, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 5.003276158689979e-05, + "loss": 0.6988, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.778173440416176e-05, + "loss": 0.6932, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8157423734664917, + "eval_runtime": 6.7938, + "eval_samples_per_second": 531.071, + "eval_steps_per_second": 8.39, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.7152904447857337e-05, + "loss": 0.6939, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.8209472821725862e-05, + "loss": 0.6984, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8108386397361755, + "eval_runtime": 6.4684, + "eval_samples_per_second": 557.785, + "eval_steps_per_second": 8.812, + "step": 1147 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.000621188701762705, + "metric": "eval/loss", + "warmup_ratio": 0.158818113482139 + } +} diff --git a/run-pg2pljei/checkpoint-1147/training_args.bin b/run-pg2pljei/checkpoint-1147/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..65d841ae42dcb1234113d39e75ed9fd22cf20b41 --- /dev/null +++ b/run-pg2pljei/checkpoint-1147/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9235602af7e7acb6f6cddd63e2b541f5d5754289431bb8d8a7d4c31806dbc2 +size 4792 diff --git a/run-pg2pljei/checkpoint-1260/model.safetensors b/run-pg2pljei/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f56463a783650a49756b39888e1506c433eb2d15 --- /dev/null +++ b/run-pg2pljei/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f18caff5deacf85925c0e66fd27da3bc7faacc1851d693dbc24761f9ae54efa +size 198025308 diff --git a/run-pg2pljei/checkpoint-1260/optimizer.pt b/run-pg2pljei/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7aa8552c2245a807ac4e5f314b420580efbaecc5 --- /dev/null +++ b/run-pg2pljei/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c86fbc32a02edbdf72b453399c1d5192b9a65b5016333594ef62c9f43aa8d7 +size 395900602 diff --git a/run-pg2pljei/checkpoint-1260/rng_state.pth b/run-pg2pljei/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-pg2pljei/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-pg2pljei/checkpoint-1260/scheduler.pt b/run-pg2pljei/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..205876430b4287b951b6c9b20261c370fbb245ef --- /dev/null +++ b/run-pg2pljei/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2695c910452cf7612289309d4cb26bc9f8b7fe6b69988badb395299b39a39360 +size 1064 diff --git a/run-pg2pljei/checkpoint-1260/trainer_state.json b/run-pg2pljei/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ca26a8cb82699d3c99438c1e9bea1337ea7c28a0 --- /dev/null +++ b/run-pg2pljei/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9185144124168514, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-pg2pljei/checkpoint-1147", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.035276739219069e-05, + "loss": 1.1954, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.9694392681121826, + "eval_runtime": 6.7187, + "eval_samples_per_second": 537.01, + "eval_steps_per_second": 8.484, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00016070553478438138, + "loss": 0.856, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002410583021765721, + "loss": 0.8108, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.897450110864745, + "eval_loss": 0.8358007073402405, + "eval_runtime": 6.8533, + "eval_samples_per_second": 526.463, + "eval_steps_per_second": 8.317, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00032141106956876277, + "loss": 0.8026, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8957871396895787, + "eval_loss": 0.8440775275230408, + "eval_runtime": 6.7577, + "eval_samples_per_second": 533.906, + "eval_steps_per_second": 8.435, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004017638369609535, + "loss": 0.8089, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004821166043531442, + "loss": 0.7961, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.4844789356984479, + "eval_loss": 1.4451898336410522, + "eval_runtime": 6.9711, + "eval_samples_per_second": 517.563, + "eval_steps_per_second": 8.177, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005624693717453348, + "loss": 0.8036, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006211217361486711, + "loss": 0.8146, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8747228381374723, + "eval_loss": 0.8752522468566895, + "eval_runtime": 7.2634, + "eval_samples_per_second": 496.734, + "eval_steps_per_second": 7.848, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006197015599435306, + "loss": 0.8164, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8594789356984479, + "eval_loss": 0.9122125506401062, + "eval_runtime": 6.7718, + "eval_samples_per_second": 532.799, + "eval_steps_per_second": 8.417, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0006164433717590094, + "loss": 0.8086, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0006113665454199119, + "loss": 0.7961, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.7774390243902439, + "eval_loss": 0.9890197515487671, + "eval_runtime": 6.837, + "eval_samples_per_second": 527.714, + "eval_steps_per_second": 8.337, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0006045012687295471, + "loss": 0.8009, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005958883639671395, + "loss": 0.7869, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8808203991130821, + "eval_loss": 0.8744761347770691, + "eval_runtime": 6.9095, + "eval_samples_per_second": 522.183, + "eval_steps_per_second": 8.25, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0005855790451505651, + "loss": 0.7852, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8799889135254989, + "eval_loss": 0.8779320120811462, + "eval_runtime": 6.7899, + "eval_samples_per_second": 531.381, + "eval_steps_per_second": 8.395, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005736346135077756, + "loss": 0.7963, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005601260929676946, + "loss": 0.7907, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8694567627494457, + "eval_loss": 0.8739984631538391, + "eval_runtime": 7.0185, + "eval_samples_per_second": 514.068, + "eval_steps_per_second": 8.121, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005451338078380334, + "loss": 0.7751, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.88470066518847, + "eval_loss": 0.85569167137146, + "eval_runtime": 6.9684, + "eval_samples_per_second": 517.768, + "eval_steps_per_second": 8.18, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005287469051812307, + "loss": 0.7714, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0005110628247285683, + "loss": 0.7668, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8883037694013304, + "eval_loss": 0.8574525713920593, + "eval_runtime": 6.8333, + "eval_samples_per_second": 528.006, + "eval_steps_per_second": 8.342, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004921867194844513, + "loss": 0.7602, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004722308304660598, + "loss": 0.7611, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8736141906873615, + "eval_loss": 0.8688214421272278, + "eval_runtime": 6.7769, + "eval_samples_per_second": 532.399, + "eval_steps_per_second": 8.411, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00045131381929628604, + "loss": 0.7578, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8952328159645233, + "eval_loss": 0.8484098315238953, + "eval_runtime": 6.9591, + "eval_samples_per_second": 518.458, + "eval_steps_per_second": 8.191, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00042956006261849427, + "loss": 0.7608, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00040709891252865024, + "loss": 0.742, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8440089821815491, + "eval_runtime": 6.7657, + "eval_samples_per_second": 533.279, + "eval_steps_per_second": 8.425, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.000384063927422435, + "loss": 0.7556, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0003605920778308776, + "loss": 0.7429, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8473629951477051, + "eval_runtime": 6.7259, + "eval_samples_per_second": 536.436, + "eval_steps_per_second": 8.475, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.000336822931966762, + "loss": 0.7382, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8268345594406128, + "eval_runtime": 6.6873, + "eval_samples_per_second": 539.534, + "eval_steps_per_second": 8.524, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003128978258247111, + "loss": 0.7309, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00028895902276969285, + "loss": 0.7284, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8397396206855774, + "eval_runtime": 6.9756, + "eval_samples_per_second": 517.231, + "eval_steps_per_second": 8.171, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002651488676112012, + "loss": 0.7225, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00024160894019315186, + "loss": 0.7221, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9010532150776053, + "eval_loss": 0.8316428661346436, + "eval_runtime": 6.744, + "eval_samples_per_second": 534.991, + "eval_steps_per_second": 8.452, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002184792135324129, + "loss": 0.7264, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8261064291000366, + "eval_runtime": 6.9855, + "eval_samples_per_second": 516.497, + "eval_steps_per_second": 8.16, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00019589722151184424, + "loss": 0.7178, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00017399724107690408, + "loss": 0.7128, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8171206116676331, + "eval_runtime": 6.5471, + "eval_samples_per_second": 551.082, + "eval_steps_per_second": 8.706, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00015290949379864475, + "loss": 0.7051, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8251943588256836, + "eval_runtime": 6.8382, + "eval_samples_per_second": 527.622, + "eval_steps_per_second": 8.335, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.000132759371550757, + "loss": 0.7075, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00011366669090494078, + "loss": 0.7074, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8129871487617493, + "eval_runtime": 6.693, + "eval_samples_per_second": 539.07, + "eval_steps_per_second": 8.516, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 9.574498067811354e-05, + "loss": 0.7071, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 7.910080686783936e-05, + "loss": 0.7014, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8132690191268921, + "eval_runtime": 6.8111, + "eval_samples_per_second": 529.727, + "eval_steps_per_second": 8.369, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 6.383313899004426e-05, + "loss": 0.6983, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8120121955871582, + "eval_runtime": 6.9527, + "eval_samples_per_second": 518.935, + "eval_steps_per_second": 8.198, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 5.003276158689979e-05, + "loss": 0.6988, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 3.778173440416176e-05, + "loss": 0.6932, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8157423734664917, + "eval_runtime": 6.7938, + "eval_samples_per_second": 531.071, + "eval_steps_per_second": 8.39, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.7152904447857337e-05, + "loss": 0.6939, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.8209472821725862e-05, + "loss": 0.6984, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8108386397361755, + "eval_runtime": 6.4684, + "eval_samples_per_second": 557.785, + "eval_steps_per_second": 8.812, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.1004618921082745e-05, + "loss": 0.6944, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8109570145606995, + "eval_runtime": 6.8208, + "eval_samples_per_second": 528.972, + "eval_steps_per_second": 8.357, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 5.581184217724413e-06, + "loss": 0.6912, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.971417516149414e-06, + "loss": 0.691, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8153846263885498, + "eval_runtime": 6.9132, + "eval_samples_per_second": 521.901, + "eval_steps_per_second": 8.245, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.967831958537875e-07, + "loss": 0.6942, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8084887266159058, + "eval_runtime": 6.827, + "eval_samples_per_second": 528.487, + "eval_steps_per_second": 8.349, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.000621188701762705, + "metric": "eval/loss", + "warmup_ratio": 0.158818113482139 + } +} diff --git a/run-pg2pljei/checkpoint-1260/training_args.bin b/run-pg2pljei/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..65d841ae42dcb1234113d39e75ed9fd22cf20b41 --- /dev/null +++ b/run-pg2pljei/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9235602af7e7acb6f6cddd63e2b541f5d5754289431bb8d8a7d4c31806dbc2 +size 4792 diff --git a/run-pngad7dn/checkpoint-573/model.safetensors b/run-pngad7dn/checkpoint-573/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5da0ddac18b5ecc185d0ebca4c8f6649901ab50 --- /dev/null +++ b/run-pngad7dn/checkpoint-573/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c358a758ec68438f76afaea7fbb8a4c696a320df1ce9ea0f86aa4730b6c9940 +size 198025308 diff --git a/run-pngad7dn/checkpoint-573/optimizer.pt b/run-pngad7dn/checkpoint-573/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a33a838136bfb312912e67a04237bb189a24416d --- /dev/null +++ b/run-pngad7dn/checkpoint-573/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3370e712f316fa4b842c8d48df629ea6baf527875ae113ec6679ab545649e500 +size 395900602 diff --git a/run-pngad7dn/checkpoint-573/rng_state.pth b/run-pngad7dn/checkpoint-573/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e8713f9caaf617efce0d3935618a93ad2f5f391 --- /dev/null +++ b/run-pngad7dn/checkpoint-573/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9102bb312b12c2313ea7585eb813beef5c548592778aaea4ab0516e14ecd38e5 +size 14244 diff --git a/run-pngad7dn/checkpoint-573/scheduler.pt b/run-pngad7dn/checkpoint-573/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1056e47399f5931f58ea0a9279e02c8d1f6a6792 --- /dev/null +++ b/run-pngad7dn/checkpoint-573/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40335c160e7b0c1d43aac703bbb0cab5974482b96890889f3e953f84b4ba5c38 +size 1064 diff --git a/run-pngad7dn/checkpoint-573/trainer_state.json b/run-pngad7dn/checkpoint-573/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..506f828768c9c5285e5a9f3d36a3de92a021d2f7 --- /dev/null +++ b/run-pngad7dn/checkpoint-573/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": 0.9184928000354279, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-pngad7dn/checkpoint-573", + "epoch": 26.96470588235294, + "eval_steps": 500, + "global_step": 573, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.6606658910010463e-05, + "loss": 1.4682, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7514814673341468, + "eval_loss": 1.0282491445541382, + "eval_precision": 0.7193932906845331, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.2294, + "eval_samples_per_second": 438.429, + "eval_steps_per_second": 3.524, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.321331782002093e-05, + "loss": 1.1177, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.9819976730031394e-05, + "loss": 0.9319, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8733370288248337, + "eval_f1": 0.8483226682443482, + "eval_loss": 0.9193330407142639, + "eval_precision": 0.8702380163739608, + "eval_recall": 0.8733370288248337, + "eval_runtime": 8.2451, + "eval_samples_per_second": 437.595, + "eval_steps_per_second": 3.517, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.642663564004185e-05, + "loss": 0.8585, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.8955717583194334, + "eval_loss": 0.8422123193740845, + "eval_precision": 0.892010911011154, + "eval_recall": 0.9046563192904656, + "eval_runtime": 8.4705, + "eval_samples_per_second": 425.951, + "eval_steps_per_second": 3.424, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 8.303329455005233e-05, + "loss": 0.8284, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.963995346006279e-05, + "loss": 0.7991, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.9006246873810517, + "eval_loss": 0.8324504494667053, + "eval_precision": 0.9024657943075222, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.3236, + "eval_samples_per_second": 433.468, + "eval_steps_per_second": 3.484, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011624661237007325, + "loss": 0.7917, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001328532712800837, + "loss": 0.7797, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9051103857104487, + "eval_loss": 0.8143452405929565, + "eval_precision": 0.9021587956020322, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.2043, + "eval_samples_per_second": 439.77, + "eval_steps_per_second": 3.535, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001494599301900942, + "loss": 0.7626, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9114953597438054, + "eval_loss": 0.8027127385139465, + "eval_precision": 0.9102513281911367, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.7819, + "eval_samples_per_second": 463.643, + "eval_steps_per_second": 3.727, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016093183375431538, + "loss": 0.7651, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00016050542918475926, + "loss": 0.7567, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9085399470909878, + "eval_loss": 0.8093046545982361, + "eval_precision": 0.9068139442944223, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.4107, + "eval_samples_per_second": 428.977, + "eval_steps_per_second": 3.448, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00015955382513897534, + "loss": 0.751, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001580832667831415, + "loss": 0.7476, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8384146341463414, + "eval_f1": 0.853259968721092, + "eval_loss": 0.9248923659324646, + "eval_precision": 0.8896655784208132, + "eval_recall": 0.8384146341463414, + "eval_runtime": 8.2228, + "eval_samples_per_second": 438.779, + "eval_steps_per_second": 3.527, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00015610340506540916, + "loss": 0.7435, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.897901964596072, + "eval_loss": 0.8439856171607971, + "eval_precision": 0.9018191189066853, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.9178, + "eval_samples_per_second": 455.68, + "eval_steps_per_second": 3.663, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00015362723337887212, + "loss": 0.7367, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001506710022888112, + "loss": 0.7327, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9070764744206813, + "eval_loss": 0.810111939907074, + "eval_precision": 0.9046915596734749, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.2565, + "eval_samples_per_second": 436.991, + "eval_steps_per_second": 3.512, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001472541128838405, + "loss": 0.7286, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8392461197339246, + "eval_f1": 0.8537323114802033, + "eval_loss": 0.9132199883460999, + "eval_precision": 0.8875122550876335, + "eval_recall": 0.8392461197339246, + "eval_runtime": 7.9599, + "eval_samples_per_second": 453.274, + "eval_steps_per_second": 3.643, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0001433989894508682, + "loss": 0.7272, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00013913093230947778, + "loss": 0.7201, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9106973075982288, + "eval_loss": 0.8169375658035278, + "eval_precision": 0.9114418308009902, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.8155, + "eval_samples_per_second": 461.649, + "eval_steps_per_second": 3.711, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001344779517715453, + "loss": 0.7221, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001294705843157782, + "loss": 0.716, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8763858093126385, + "eval_f1": 0.8846646779862251, + "eval_loss": 0.8635155558586121, + "eval_precision": 0.9023575021310767, + "eval_recall": 0.8763858093126385, + "eval_runtime": 8.2589, + "eval_samples_per_second": 436.863, + "eval_steps_per_second": 3.511, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00012414169218358224, + "loss": 0.7162, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.9043411825346244, + "eval_loss": 0.8228191137313843, + "eval_precision": 0.9055641694671297, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.8824, + "eval_samples_per_second": 457.726, + "eval_steps_per_second": 3.679, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00011852624771146353, + "loss": 0.711, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00011266110381534513, + "loss": 0.7114, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9081870872669119, + "eval_loss": 0.8182196617126465, + "eval_precision": 0.9126225492200917, + "eval_recall": 0.9096452328159645, + "eval_runtime": 7.9496, + "eval_samples_per_second": 453.86, + "eval_steps_per_second": 3.648, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00010658475213305792, + "loss": 0.7125, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010033707041226333, + "loss": 0.7103, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.8984492777263368, + "eval_loss": 0.8251257538795471, + "eval_precision": 0.9024246300350663, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.0824, + "eval_samples_per_second": 446.401, + "eval_steps_per_second": 3.588, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 9.3959060801645e-05, + "loss": 0.7053, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8949556541019955, + "eval_f1": 0.8977127954443364, + "eval_loss": 0.8413810729980469, + "eval_precision": 0.9060704199921686, + "eval_recall": 0.8949556541019955, + "eval_runtime": 7.8946, + "eval_samples_per_second": 457.023, + "eval_steps_per_second": 3.673, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 8.749258076290578e-05, + "loss": 0.704, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 8.098006836953543e-05, + "loss": 0.7016, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9131983798732631, + "eval_loss": 0.807257890701294, + "eval_precision": 0.9122666156944252, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.0964, + "eval_samples_per_second": 445.631, + "eval_steps_per_second": 3.582, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 7.4464263795152e-05, + "loss": 0.7033, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 6.798792881922688e-05, + "loss": 0.6967, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9085903518695634, + "eval_loss": 0.8192573189735413, + "eval_precision": 0.9109913917882523, + "eval_recall": 0.9088137472283814, + "eval_runtime": 8.2919, + "eval_samples_per_second": 435.123, + "eval_steps_per_second": 3.497, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 6.159356619101567e-05, + "loss": 0.7058, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9073584510258147, + "eval_loss": 0.8104814887046814, + "eval_precision": 0.90734316964549, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.3803, + "eval_samples_per_second": 430.536, + "eval_steps_per_second": 3.461, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 5.532314069344669e-05, + "loss": 0.6986, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.921780373756339e-05, + "loss": 0.6931, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.909449000689685, + "eval_loss": 0.8110982775688171, + "eval_precision": 0.9060612571936242, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8376, + "eval_samples_per_second": 460.344, + "eval_steps_per_second": 3.7, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 4.3317623294945596e-05, + "loss": 0.6962, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9101494514204306, + "eval_loss": 0.8114901185035706, + "eval_precision": 0.9075502600733413, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.2168, + "eval_samples_per_second": 439.1, + "eval_steps_per_second": 3.529, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.7661320940503696e-05, + "loss": 0.6951, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.2286017731375486e-05, + "loss": 0.6908, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9094854057043816, + "eval_loss": 0.8118507862091064, + "eval_precision": 0.9079458773936686, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.4016, + "eval_samples_per_second": 429.444, + "eval_steps_per_second": 3.452, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.7226990589665792e-05, + "loss": 0.6945, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.2517440787835087e-05, + "loss": 0.6926, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9116681886674011, + "eval_loss": 0.8057974576950073, + "eval_precision": 0.9100250369756386, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.3623, + "eval_samples_per_second": 431.458, + "eval_steps_per_second": 3.468, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.818827605611632e-05, + "loss": 0.692, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9091754803296253, + "eval_loss": 0.8123637437820435, + "eval_precision": 0.9090244278880881, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.3105, + "eval_samples_per_second": 434.152, + "eval_steps_per_second": 3.49, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.426790774194015e-05, + "loss": 0.6901, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.0782064352566686e-05, + "loss": 0.69, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9159691579551403, + "eval_loss": 0.8035136461257935, + "eval_precision": 0.9139540449721956, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.9491, + "eval_samples_per_second": 453.887, + "eval_steps_per_second": 3.648, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 7.753622704601885e-06, + "loss": 0.6879, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 5.202457788526748e-06, + "loss": 0.6856, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9184928000354279, + "eval_loss": 0.8032702803611755, + "eval_precision": 0.9156659730174471, + "eval_recall": 0.9229490022172949, + "eval_runtime": 7.8509, + "eval_samples_per_second": 459.563, + "eval_steps_per_second": 3.694, + "step": 573 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.11008857413112771, + "learning_rate": 0.0001609568478970245, + "metric": "eval/loss", + "weight_decay": 0.19641611991207125 + } +} diff --git a/run-pngad7dn/checkpoint-573/training_args.bin b/run-pngad7dn/checkpoint-573/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d3b72370f80cad378439b5a43437d524796c179b --- /dev/null +++ b/run-pngad7dn/checkpoint-573/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d628181c4150e61a6e22dcd512f47ce3bab843109309fa3c04b1d5e767876551 +size 4792 diff --git a/run-pngad7dn/checkpoint-630/model.safetensors b/run-pngad7dn/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f03ba11522fc68b0d343fdb7742a55086132d10 --- /dev/null +++ b/run-pngad7dn/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a922282c1a1379df66cd1551cabe081a30d45210dc1dd359730de11763bd729 +size 198025308 diff --git a/run-pngad7dn/checkpoint-630/optimizer.pt b/run-pngad7dn/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0e3eca1c065eb9d129a2cc2a81042871f78b754 --- /dev/null +++ b/run-pngad7dn/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee46d10aa2f923d4c4e7afe2ae30b9cb7dc62e90154ff6e633dbd9c8baebc66f +size 395900602 diff --git a/run-pngad7dn/checkpoint-630/rng_state.pth b/run-pngad7dn/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-pngad7dn/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-pngad7dn/checkpoint-630/scheduler.pt b/run-pngad7dn/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b85f2de455da45a4ebc3e78eb6ac618063ef570d --- /dev/null +++ b/run-pngad7dn/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20d9cf84e663b65054c9e2b7f2b4ceee8b7ad38f901525738fb51b7f3fa081a3 +size 1064 diff --git a/run-pngad7dn/checkpoint-630/trainer_state.json b/run-pngad7dn/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c6d9f28a82cc09483ea19acac5f8990316b2283e --- /dev/null +++ b/run-pngad7dn/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9184928000354279, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-pngad7dn/checkpoint-573", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.6606658910010463e-05, + "loss": 1.4682, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7514814673341468, + "eval_loss": 1.0282491445541382, + "eval_precision": 0.7193932906845331, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.2294, + "eval_samples_per_second": 438.429, + "eval_steps_per_second": 3.524, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.321331782002093e-05, + "loss": 1.1177, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.9819976730031394e-05, + "loss": 0.9319, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8733370288248337, + "eval_f1": 0.8483226682443482, + "eval_loss": 0.9193330407142639, + "eval_precision": 0.8702380163739608, + "eval_recall": 0.8733370288248337, + "eval_runtime": 8.2451, + "eval_samples_per_second": 437.595, + "eval_steps_per_second": 3.517, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.642663564004185e-05, + "loss": 0.8585, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.8955717583194334, + "eval_loss": 0.8422123193740845, + "eval_precision": 0.892010911011154, + "eval_recall": 0.9046563192904656, + "eval_runtime": 8.4705, + "eval_samples_per_second": 425.951, + "eval_steps_per_second": 3.424, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 8.303329455005233e-05, + "loss": 0.8284, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 9.963995346006279e-05, + "loss": 0.7991, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9024390243902439, + "eval_f1": 0.9006246873810517, + "eval_loss": 0.8324504494667053, + "eval_precision": 0.9024657943075222, + "eval_recall": 0.9024390243902439, + "eval_runtime": 8.3236, + "eval_samples_per_second": 433.468, + "eval_steps_per_second": 3.484, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011624661237007325, + "loss": 0.7917, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001328532712800837, + "loss": 0.7797, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9051103857104487, + "eval_loss": 0.8143452405929565, + "eval_precision": 0.9021587956020322, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.2043, + "eval_samples_per_second": 439.77, + "eval_steps_per_second": 3.535, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001494599301900942, + "loss": 0.7626, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9114953597438054, + "eval_loss": 0.8027127385139465, + "eval_precision": 0.9102513281911367, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.7819, + "eval_samples_per_second": 463.643, + "eval_steps_per_second": 3.727, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016093183375431538, + "loss": 0.7651, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00016050542918475926, + "loss": 0.7567, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9085399470909878, + "eval_loss": 0.8093046545982361, + "eval_precision": 0.9068139442944223, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.4107, + "eval_samples_per_second": 428.977, + "eval_steps_per_second": 3.448, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00015955382513897534, + "loss": 0.751, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001580832667831415, + "loss": 0.7476, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8384146341463414, + "eval_f1": 0.853259968721092, + "eval_loss": 0.9248923659324646, + "eval_precision": 0.8896655784208132, + "eval_recall": 0.8384146341463414, + "eval_runtime": 8.2228, + "eval_samples_per_second": 438.779, + "eval_steps_per_second": 3.527, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00015610340506540916, + "loss": 0.7435, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.897901964596072, + "eval_loss": 0.8439856171607971, + "eval_precision": 0.9018191189066853, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.9178, + "eval_samples_per_second": 455.68, + "eval_steps_per_second": 3.663, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00015362723337887212, + "loss": 0.7367, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001506710022888112, + "loss": 0.7327, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9070764744206813, + "eval_loss": 0.810111939907074, + "eval_precision": 0.9046915596734749, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.2565, + "eval_samples_per_second": 436.991, + "eval_steps_per_second": 3.512, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001472541128838405, + "loss": 0.7286, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8392461197339246, + "eval_f1": 0.8537323114802033, + "eval_loss": 0.9132199883460999, + "eval_precision": 0.8875122550876335, + "eval_recall": 0.8392461197339246, + "eval_runtime": 7.9599, + "eval_samples_per_second": 453.274, + "eval_steps_per_second": 3.643, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0001433989894508682, + "loss": 0.7272, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00013913093230947778, + "loss": 0.7201, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9106973075982288, + "eval_loss": 0.8169375658035278, + "eval_precision": 0.9114418308009902, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.8155, + "eval_samples_per_second": 461.649, + "eval_steps_per_second": 3.711, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001344779517715453, + "loss": 0.7221, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001294705843157782, + "loss": 0.716, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8763858093126385, + "eval_f1": 0.8846646779862251, + "eval_loss": 0.8635155558586121, + "eval_precision": 0.9023575021310767, + "eval_recall": 0.8763858093126385, + "eval_runtime": 8.2589, + "eval_samples_per_second": 436.863, + "eval_steps_per_second": 3.511, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00012414169218358224, + "loss": 0.7162, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.9043411825346244, + "eval_loss": 0.8228191137313843, + "eval_precision": 0.9055641694671297, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.8824, + "eval_samples_per_second": 457.726, + "eval_steps_per_second": 3.679, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00011852624771146353, + "loss": 0.711, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00011266110381534513, + "loss": 0.7114, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9081870872669119, + "eval_loss": 0.8182196617126465, + "eval_precision": 0.9126225492200917, + "eval_recall": 0.9096452328159645, + "eval_runtime": 7.9496, + "eval_samples_per_second": 453.86, + "eval_steps_per_second": 3.648, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00010658475213305792, + "loss": 0.7125, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010033707041226333, + "loss": 0.7103, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.8984492777263368, + "eval_loss": 0.8251257538795471, + "eval_precision": 0.9024246300350663, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.0824, + "eval_samples_per_second": 446.401, + "eval_steps_per_second": 3.588, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 9.3959060801645e-05, + "loss": 0.7053, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8949556541019955, + "eval_f1": 0.8977127954443364, + "eval_loss": 0.8413810729980469, + "eval_precision": 0.9060704199921686, + "eval_recall": 0.8949556541019955, + "eval_runtime": 7.8946, + "eval_samples_per_second": 457.023, + "eval_steps_per_second": 3.673, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 8.749258076290578e-05, + "loss": 0.704, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 8.098006836953543e-05, + "loss": 0.7016, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9131983798732631, + "eval_loss": 0.807257890701294, + "eval_precision": 0.9122666156944252, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.0964, + "eval_samples_per_second": 445.631, + "eval_steps_per_second": 3.582, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 7.4464263795152e-05, + "loss": 0.7033, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 6.798792881922688e-05, + "loss": 0.6967, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9085903518695634, + "eval_loss": 0.8192573189735413, + "eval_precision": 0.9109913917882523, + "eval_recall": 0.9088137472283814, + "eval_runtime": 8.2919, + "eval_samples_per_second": 435.123, + "eval_steps_per_second": 3.497, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 6.159356619101567e-05, + "loss": 0.7058, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9073584510258147, + "eval_loss": 0.8104814887046814, + "eval_precision": 0.90734316964549, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.3803, + "eval_samples_per_second": 430.536, + "eval_steps_per_second": 3.461, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 5.532314069344669e-05, + "loss": 0.6986, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.921780373756339e-05, + "loss": 0.6931, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.909449000689685, + "eval_loss": 0.8110982775688171, + "eval_precision": 0.9060612571936242, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8376, + "eval_samples_per_second": 460.344, + "eval_steps_per_second": 3.7, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 4.3317623294945596e-05, + "loss": 0.6962, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9101494514204306, + "eval_loss": 0.8114901185035706, + "eval_precision": 0.9075502600733413, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.2168, + "eval_samples_per_second": 439.1, + "eval_steps_per_second": 3.529, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.7661320940503696e-05, + "loss": 0.6951, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.2286017731375486e-05, + "loss": 0.6908, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9094854057043816, + "eval_loss": 0.8118507862091064, + "eval_precision": 0.9079458773936686, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.4016, + "eval_samples_per_second": 429.444, + "eval_steps_per_second": 3.452, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.7226990589665792e-05, + "loss": 0.6945, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.2517440787835087e-05, + "loss": 0.6926, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9116681886674011, + "eval_loss": 0.8057974576950073, + "eval_precision": 0.9100250369756386, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.3623, + "eval_samples_per_second": 431.458, + "eval_steps_per_second": 3.468, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.818827605611632e-05, + "loss": 0.692, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9091754803296253, + "eval_loss": 0.8123637437820435, + "eval_precision": 0.9090244278880881, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.3105, + "eval_samples_per_second": 434.152, + "eval_steps_per_second": 3.49, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.426790774194015e-05, + "loss": 0.6901, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.0782064352566686e-05, + "loss": 0.69, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9159691579551403, + "eval_loss": 0.8035136461257935, + "eval_precision": 0.9139540449721956, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.9491, + "eval_samples_per_second": 453.887, + "eval_steps_per_second": 3.648, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 7.753622704601885e-06, + "loss": 0.6879, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 5.202457788526748e-06, + "loss": 0.6856, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9184928000354279, + "eval_loss": 0.8032702803611755, + "eval_precision": 0.9156659730174471, + "eval_recall": 0.9229490022172949, + "eval_runtime": 7.8509, + "eval_samples_per_second": 459.563, + "eval_steps_per_second": 3.694, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 3.1453123335454993e-06, + "loss": 0.6894, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9117645142556667, + "eval_loss": 0.8062810897827148, + "eval_precision": 0.9097088506213983, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.6055, + "eval_samples_per_second": 474.393, + "eval_steps_per_second": 3.813, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.5956869287691682e-06, + "loss": 0.6905, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 5.637514218452438e-07, + "loss": 0.6901, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9168016796924301, + "eval_loss": 0.8020651340484619, + "eval_precision": 0.9146905577079981, + "eval_recall": 0.9212860310421286, + "eval_runtime": 8.2822, + "eval_samples_per_second": 435.634, + "eval_steps_per_second": 3.501, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 5.627817650442139e-08, + "loss": 0.6891, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9149707959944082, + "eval_loss": 0.8064096570014954, + "eval_precision": 0.913859998715943, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.7833, + "eval_samples_per_second": 463.557, + "eval_steps_per_second": 3.726, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.11008857413112771, + "learning_rate": 0.0001609568478970245, + "metric": "eval/loss", + "weight_decay": 0.19641611991207125 + } +} diff --git a/run-pngad7dn/checkpoint-630/training_args.bin b/run-pngad7dn/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d3b72370f80cad378439b5a43437d524796c179b --- /dev/null +++ b/run-pngad7dn/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d628181c4150e61a6e22dcd512f47ce3bab843109309fa3c04b1d5e767876551 +size 4792 diff --git a/run-qiksy3ao/checkpoint-1105/model.safetensors b/run-qiksy3ao/checkpoint-1105/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a8c0d4248e8cdfef53da93720c772046241f1f5 --- /dev/null +++ b/run-qiksy3ao/checkpoint-1105/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be5ed75c2c6e782b0e5709163d153b98fe2776c39c44d267f210e7d18e955bad +size 198025308 diff --git a/run-qiksy3ao/checkpoint-1105/optimizer.pt b/run-qiksy3ao/checkpoint-1105/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc9ad4d4ba16d9ce469848dbe01278b73ddf0fc7 --- /dev/null +++ b/run-qiksy3ao/checkpoint-1105/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff37036ad1f8ad8293c0c527c3614fa743138d129af07e4517c48ad7de934b7 +size 395900602 diff --git a/run-qiksy3ao/checkpoint-1105/rng_state.pth b/run-qiksy3ao/checkpoint-1105/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bfd69dc3eaa0847aabf0337c1ad85f5c3279ee47 --- /dev/null +++ b/run-qiksy3ao/checkpoint-1105/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f99f80df2e12a39424bcc4a26062a4e6cfd833bf0e3008f84ee50071a2b760 +size 14244 diff --git a/run-qiksy3ao/checkpoint-1105/scheduler.pt b/run-qiksy3ao/checkpoint-1105/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c824eba79b2df27b689efca3fb0ae6958acfef8b --- /dev/null +++ b/run-qiksy3ao/checkpoint-1105/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ca996a6c32adb2e3e19886f18007582774b48ac9bd65e00affefdc2f6c2027 +size 1064 diff --git a/run-qiksy3ao/checkpoint-1105/trainer_state.json b/run-qiksy3ao/checkpoint-1105/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2972d8cd57ae0edbaf8c5a1eb74acbe5abf32812 --- /dev/null +++ b/run-qiksy3ao/checkpoint-1105/trainer_state.json @@ -0,0 +1,513 @@ +{ + "best_metric": 0.926829268292683, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-qiksy3ao/checkpoint-1105", + "epoch": 26.0, + "eval_steps": 500, + "global_step": 1105, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.51941598887968e-05, + "loss": 1.2211, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 1.0094623565673828, + "eval_runtime": 6.5384, + "eval_samples_per_second": 551.821, + "eval_steps_per_second": 8.718, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001303883197775936, + "loss": 0.8634, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001955824796663904, + "loss": 0.818, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8382272124290466, + "eval_runtime": 6.7164, + "eval_samples_per_second": 537.189, + "eval_steps_per_second": 8.487, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002607766395551872, + "loss": 0.7969, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8229635953903198, + "eval_runtime": 6.8694, + "eval_samples_per_second": 525.227, + "eval_steps_per_second": 8.298, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00032597079944398396, + "loss": 0.8055, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0003911649593327808, + "loss": 0.79, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8966186252771619, + "eval_loss": 0.8360774517059326, + "eval_runtime": 6.9119, + "eval_samples_per_second": 521.998, + "eval_steps_per_second": 8.247, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004387617808714065, + "loss": 0.7945, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004378060366349792, + "loss": 0.7992, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8802660753880266, + "eval_loss": 0.8782799243927002, + "eval_runtime": 6.6313, + "eval_samples_per_second": 544.088, + "eval_steps_per_second": 8.596, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004356130930699721, + "loss": 0.7925, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8679996132850647, + "eval_runtime": 7.1031, + "eval_samples_per_second": 507.951, + "eval_steps_per_second": 8.025, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00043219537268478567, + "loss": 0.7871, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004275722360598164, + "loss": 0.7835, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8860864745011087, + "eval_loss": 0.843425989151001, + "eval_runtime": 6.6702, + "eval_samples_per_second": 540.916, + "eval_steps_per_second": 8.546, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004217698721743322, + "loss": 0.7789, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0004148211500520781, + "loss": 0.7653, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8356900215148926, + "eval_runtime": 6.5923, + "eval_samples_per_second": 547.309, + "eval_steps_per_second": 8.647, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0004067654325660038, + "loss": 0.7657, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8971729490022173, + "eval_loss": 0.8292281031608582, + "eval_runtime": 6.6492, + "eval_samples_per_second": 542.621, + "eval_steps_per_second": 8.572, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003976483534568672, + "loss": 0.7688, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0003875215588288486, + "loss": 0.7598, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8199973702430725, + "eval_runtime": 6.8747, + "eval_samples_per_second": 524.823, + "eval_steps_per_second": 8.291, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00037644241458654456, + "loss": 0.7486, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8351446986198425, + "eval_runtime": 6.7136, + "eval_samples_per_second": 537.419, + "eval_steps_per_second": 8.49, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00036447368147063955, + "loss": 0.75, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0003516831595331033, + "loss": 0.7443, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 0.8385865092277527, + "eval_runtime": 6.6971, + "eval_samples_per_second": 538.743, + "eval_steps_per_second": 8.511, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00033814330406587757, + "loss": 0.7379, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003239308151587255, + "loss": 0.7343, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8295668959617615, + "eval_runtime": 6.7914, + "eval_samples_per_second": 531.26, + "eval_steps_per_second": 8.393, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003091262032113021, + "loss": 0.7334, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8309065103530884, + "eval_runtime": 6.7847, + "eval_samples_per_second": 531.787, + "eval_steps_per_second": 8.401, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00029381333286071704, + "loss": 0.7353, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002780789479081329, + "loss": 0.7255, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8169041275978088, + "eval_runtime": 6.59, + "eval_samples_per_second": 547.498, + "eval_steps_per_second": 8.649, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00026201217993557853, + "loss": 0.725, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0002457040433965449, + "loss": 0.7212, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8245725631713867, + "eval_runtime": 6.6582, + "eval_samples_per_second": 541.884, + "eval_steps_per_second": 8.561, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00022924692004055967, + "loss": 0.7203, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8146755695343018, + "eval_runtime": 6.5924, + "eval_samples_per_second": 547.299, + "eval_steps_per_second": 8.646, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002127340355923511, + "loss": 0.7143, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00019625893165009143, + "loss": 0.7095, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8160279989242554, + "eval_runtime": 6.694, + "eval_samples_per_second": 538.993, + "eval_steps_per_second": 8.515, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00017991493579429209, + "loss": 0.7081, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00016379463290905783, + "loss": 0.7047, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8117980360984802, + "eval_runtime": 6.6552, + "eval_samples_per_second": 542.132, + "eval_steps_per_second": 8.565, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00014798934071054144, + "loss": 0.7016, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8066235780715942, + "eval_runtime": 6.6268, + "eval_samples_per_second": 544.453, + "eval_steps_per_second": 8.601, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00013258859245360622, + "loss": 0.6991, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00011767962974704094, + "loss": 0.6997, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8188757300376892, + "eval_runtime": 6.7268, + "eval_samples_per_second": 536.36, + "eval_steps_per_second": 8.474, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00010334690835040945, + "loss": 0.6971, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8083081245422363, + "eval_runtime": 6.6442, + "eval_samples_per_second": 543.026, + "eval_steps_per_second": 8.579, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 8.967161975207964e-05, + "loss": 0.691, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 7.673123123857975e-05, + "loss": 0.6936, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8130167126655579, + "eval_runtime": 6.6889, + "eval_samples_per_second": 539.398, + "eval_steps_per_second": 8.522, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.45990470606795e-05, + "loss": 0.6944, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.33437931820882e-05, + "loss": 0.6901, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9259977827050998, + "eval_loss": 0.7972695231437683, + "eval_runtime": 6.6662, + "eval_samples_per_second": 541.239, + "eval_steps_per_second": 8.551, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.302922796306916e-05, + "loss": 0.6951, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8041871786117554, + "eval_runtime": 6.707, + "eval_samples_per_second": 537.947, + "eval_steps_per_second": 8.499, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.3713780984357826e-05, + "loss": 0.6884, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.545022205735876e-05, + "loss": 0.688, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.926829268292683, + "eval_loss": 0.7946385741233826, + "eval_runtime": 6.5734, + "eval_samples_per_second": 548.878, + "eval_steps_per_second": 8.671, + "step": 1105 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00043880684540536304, + "metric": "eval/loss", + "warmup_ratio": 0.1383523879423885 + } +} diff --git a/run-qiksy3ao/checkpoint-1105/training_args.bin b/run-qiksy3ao/checkpoint-1105/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb4b5e5600a5d886aeb54fd51df569700181883f --- /dev/null +++ b/run-qiksy3ao/checkpoint-1105/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c45f0ef8605ae3a8de1505cf8b5b40c1d907dec3b4b9e8f8560a4f78538475 +size 4792 diff --git a/run-qiksy3ao/checkpoint-1260/model.safetensors b/run-qiksy3ao/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4362a11bb2417c03d0fe3ec205f673daaaa5db07 --- /dev/null +++ b/run-qiksy3ao/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91904a52ef26aab96a423cab9a1b4e1c5ffdc830313ecc5f2301cb6a8bf5f41d +size 198025308 diff --git a/run-qiksy3ao/checkpoint-1260/optimizer.pt b/run-qiksy3ao/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..043762cecfeba0af0324163c83e1c3dfef912958 --- /dev/null +++ b/run-qiksy3ao/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81faa6a6de41532c15de01f6b76e04c0411d5deccca185557c9f1446657efb5a +size 395900602 diff --git a/run-qiksy3ao/checkpoint-1260/rng_state.pth b/run-qiksy3ao/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-qiksy3ao/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-qiksy3ao/checkpoint-1260/scheduler.pt b/run-qiksy3ao/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fef5a174cf5978ab860b5dab835903474ab84ef6 --- /dev/null +++ b/run-qiksy3ao/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d8571c07d472e5c913709adbc0e994944d77d93b8d04868263ec0ffa55e9297 +size 1064 diff --git a/run-qiksy3ao/checkpoint-1260/trainer_state.json b/run-qiksy3ao/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b721f2ad2180e9aca67f449c8fc837206b7bc1e9 --- /dev/null +++ b/run-qiksy3ao/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.926829268292683, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-qiksy3ao/checkpoint-1105", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.51941598887968e-05, + "loss": 1.2211, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 1.0094623565673828, + "eval_runtime": 6.5384, + "eval_samples_per_second": 551.821, + "eval_steps_per_second": 8.718, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001303883197775936, + "loss": 0.8634, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001955824796663904, + "loss": 0.818, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8382272124290466, + "eval_runtime": 6.7164, + "eval_samples_per_second": 537.189, + "eval_steps_per_second": 8.487, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002607766395551872, + "loss": 0.7969, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8229635953903198, + "eval_runtime": 6.8694, + "eval_samples_per_second": 525.227, + "eval_steps_per_second": 8.298, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00032597079944398396, + "loss": 0.8055, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0003911649593327808, + "loss": 0.79, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8966186252771619, + "eval_loss": 0.8360774517059326, + "eval_runtime": 6.9119, + "eval_samples_per_second": 521.998, + "eval_steps_per_second": 8.247, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004387617808714065, + "loss": 0.7945, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004378060366349792, + "loss": 0.7992, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8802660753880266, + "eval_loss": 0.8782799243927002, + "eval_runtime": 6.6313, + "eval_samples_per_second": 544.088, + "eval_steps_per_second": 8.596, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004356130930699721, + "loss": 0.7925, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8679996132850647, + "eval_runtime": 7.1031, + "eval_samples_per_second": 507.951, + "eval_steps_per_second": 8.025, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00043219537268478567, + "loss": 0.7871, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004275722360598164, + "loss": 0.7835, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8860864745011087, + "eval_loss": 0.843425989151001, + "eval_runtime": 6.6702, + "eval_samples_per_second": 540.916, + "eval_steps_per_second": 8.546, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004217698721743322, + "loss": 0.7789, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0004148211500520781, + "loss": 0.7653, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8999445676274944, + "eval_loss": 0.8356900215148926, + "eval_runtime": 6.5923, + "eval_samples_per_second": 547.309, + "eval_steps_per_second": 8.647, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0004067654325660038, + "loss": 0.7657, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8971729490022173, + "eval_loss": 0.8292281031608582, + "eval_runtime": 6.6492, + "eval_samples_per_second": 542.621, + "eval_steps_per_second": 8.572, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0003976483534568672, + "loss": 0.7688, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0003875215588288486, + "loss": 0.7598, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8199973702430725, + "eval_runtime": 6.8747, + "eval_samples_per_second": 524.823, + "eval_steps_per_second": 8.291, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00037644241458654456, + "loss": 0.7486, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8351446986198425, + "eval_runtime": 6.7136, + "eval_samples_per_second": 537.419, + "eval_steps_per_second": 8.49, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00036447368147063955, + "loss": 0.75, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0003516831595331033, + "loss": 0.7443, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8963414634146342, + "eval_loss": 0.8385865092277527, + "eval_runtime": 6.6971, + "eval_samples_per_second": 538.743, + "eval_steps_per_second": 8.511, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00033814330406587757, + "loss": 0.7379, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003239308151587255, + "loss": 0.7343, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.899390243902439, + "eval_loss": 0.8295668959617615, + "eval_runtime": 6.7914, + "eval_samples_per_second": 531.26, + "eval_steps_per_second": 8.393, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003091262032113021, + "loss": 0.7334, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8309065103530884, + "eval_runtime": 6.7847, + "eval_samples_per_second": 531.787, + "eval_steps_per_second": 8.401, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00029381333286071704, + "loss": 0.7353, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0002780789479081329, + "loss": 0.7255, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8169041275978088, + "eval_runtime": 6.59, + "eval_samples_per_second": 547.498, + "eval_steps_per_second": 8.649, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00026201217993557853, + "loss": 0.725, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0002457040433965449, + "loss": 0.7212, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8245725631713867, + "eval_runtime": 6.6582, + "eval_samples_per_second": 541.884, + "eval_steps_per_second": 8.561, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00022924692004055967, + "loss": 0.7203, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8146755695343018, + "eval_runtime": 6.5924, + "eval_samples_per_second": 547.299, + "eval_steps_per_second": 8.646, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002127340355923511, + "loss": 0.7143, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00019625893165009143, + "loss": 0.7095, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8160279989242554, + "eval_runtime": 6.694, + "eval_samples_per_second": 538.993, + "eval_steps_per_second": 8.515, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00017991493579429209, + "loss": 0.7081, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00016379463290905783, + "loss": 0.7047, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8117980360984802, + "eval_runtime": 6.6552, + "eval_samples_per_second": 542.132, + "eval_steps_per_second": 8.565, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00014798934071054144, + "loss": 0.7016, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8066235780715942, + "eval_runtime": 6.6268, + "eval_samples_per_second": 544.453, + "eval_steps_per_second": 8.601, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00013258859245360622, + "loss": 0.6991, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00011767962974704094, + "loss": 0.6997, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8188757300376892, + "eval_runtime": 6.7268, + "eval_samples_per_second": 536.36, + "eval_steps_per_second": 8.474, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00010334690835040945, + "loss": 0.6971, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8083081245422363, + "eval_runtime": 6.6442, + "eval_samples_per_second": 543.026, + "eval_steps_per_second": 8.579, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 8.967161975207964e-05, + "loss": 0.691, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 7.673123123857975e-05, + "loss": 0.6936, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8130167126655579, + "eval_runtime": 6.6889, + "eval_samples_per_second": 539.398, + "eval_steps_per_second": 8.522, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.45990470606795e-05, + "loss": 0.6944, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.33437931820882e-05, + "loss": 0.6901, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9259977827050998, + "eval_loss": 0.7972695231437683, + "eval_runtime": 6.6662, + "eval_samples_per_second": 541.239, + "eval_steps_per_second": 8.551, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.302922796306916e-05, + "loss": 0.6951, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8041871786117554, + "eval_runtime": 6.707, + "eval_samples_per_second": 537.947, + "eval_steps_per_second": 8.499, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.3713780984357826e-05, + "loss": 0.6884, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.545022205735876e-05, + "loss": 0.688, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.926829268292683, + "eval_loss": 0.7946385741233826, + "eval_runtime": 6.5734, + "eval_samples_per_second": 548.878, + "eval_steps_per_second": 8.671, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.828536229560263e-05, + "loss": 0.6873, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.2259788940821519e-05, + "loss": 0.6838, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.7999536991119385, + "eval_runtime": 6.7274, + "eval_samples_per_second": 536.318, + "eval_steps_per_second": 8.473, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 7.407635445791622e-06, + "loss": 0.6858, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.926829268292683, + "eval_loss": 0.7966208457946777, + "eval_runtime": 6.6685, + "eval_samples_per_second": 541.049, + "eval_steps_per_second": 8.548, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 3.7563881163685955e-06, + "loss": 0.691, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.3267304080436547e-06, + "loss": 0.6888, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.800605058670044, + "eval_runtime": 6.6706, + "eval_samples_per_second": 540.88, + "eval_steps_per_second": 8.545, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.324257590435512e-07, + "loss": 0.6853, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.800426185131073, + "eval_runtime": 6.6371, + "eval_samples_per_second": 543.614, + "eval_steps_per_second": 8.588, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00043880684540536304, + "metric": "eval/loss", + "warmup_ratio": 0.1383523879423885 + } +} diff --git a/run-qiksy3ao/checkpoint-1260/training_args.bin b/run-qiksy3ao/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb4b5e5600a5d886aeb54fd51df569700181883f --- /dev/null +++ b/run-qiksy3ao/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c45f0ef8605ae3a8de1505cf8b5b40c1d907dec3b4b9e8f8560a4f78538475 +size 4792 diff --git a/run-qsxugvq5/checkpoint-616/model.safetensors b/run-qsxugvq5/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7618f260e0c682d4d4bf634c92c552ad48c9832a --- /dev/null +++ b/run-qsxugvq5/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e031c2f1052e5e578238949847d15d19eb19a0ca141566675fadabcbf91a95 +size 198025308 diff --git a/run-qsxugvq5/checkpoint-616/optimizer.pt b/run-qsxugvq5/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..61637438720d760a06dacc337820fae8b8d32dfe --- /dev/null +++ b/run-qsxugvq5/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7d53c95934e81123580263c2bd2895ac114b2dda6062764a9e1100d7cd1740c +size 395900602 diff --git a/run-qsxugvq5/checkpoint-616/rng_state.pth b/run-qsxugvq5/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-qsxugvq5/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-qsxugvq5/checkpoint-616/scheduler.pt b/run-qsxugvq5/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..47fc8c35e9ce6174364824ed6d0f4937f3957f65 --- /dev/null +++ b/run-qsxugvq5/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:353908ed8ff4f0078d4eb43a00c73d9265a6561fa4d94bab4052c1f228c268ea +size 1064 diff --git a/run-qsxugvq5/checkpoint-616/trainer_state.json b/run-qsxugvq5/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8c4dc8a7349c69294d6ab4f2d72a06e6a51c47bf --- /dev/null +++ b/run-qsxugvq5/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9183377005731362, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-qsxugvq5/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.2659723590324755e-05, + "loss": 1.4844, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8273281596452328, + "eval_f1": 0.7504958631804555, + "eval_loss": 1.093916893005371, + "eval_precision": 0.686721429930464, + "eval_recall": 0.8273281596452328, + "eval_runtime": 8.4994, + "eval_samples_per_second": 424.501, + "eval_steps_per_second": 3.412, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.531944718064951e-05, + "loss": 1.1806, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 3.797917077097427e-05, + "loss": 0.9496, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.864190687361419, + "eval_f1": 0.8275009858077613, + "eval_loss": 0.9336466789245605, + "eval_precision": 0.8520663583165606, + "eval_recall": 0.864190687361419, + "eval_runtime": 8.1808, + "eval_samples_per_second": 441.035, + "eval_steps_per_second": 3.545, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 5.063889436129902e-05, + "loss": 0.8756, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.8901114321856429, + "eval_loss": 0.8596798777580261, + "eval_precision": 0.8897549839809881, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.1201, + "eval_samples_per_second": 444.331, + "eval_steps_per_second": 3.571, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 6.32986179516238e-05, + "loss": 0.8425, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 7.595834154194854e-05, + "loss": 0.8067, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9046812662021747, + "eval_loss": 0.8218482732772827, + "eval_precision": 0.9012847299215914, + "eval_recall": 0.9104767184035477, + "eval_runtime": 8.256, + "eval_samples_per_second": 437.018, + "eval_steps_per_second": 3.513, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 8.86180651322733e-05, + "loss": 0.7992, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010127778872259804, + "loss": 0.7831, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.890726798602753, + "eval_loss": 0.8415526747703552, + "eval_precision": 0.897154734238122, + "eval_recall": 0.8891352549889135, + "eval_runtime": 8.1743, + "eval_samples_per_second": 441.382, + "eval_steps_per_second": 3.548, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011393751231292281, + "loss": 0.768, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.8969346088398956, + "eval_loss": 0.8096609711647034, + "eval_precision": 0.8984680470979992, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0773, + "eval_samples_per_second": 446.686, + "eval_steps_per_second": 3.59, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00012268286735181967, + "loss": 0.77, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001223578071444966, + "loss": 0.7618, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9088604032169263, + "eval_loss": 0.8173738121986389, + "eval_precision": 0.907633448779218, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.0448, + "eval_samples_per_second": 448.491, + "eval_steps_per_second": 3.605, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00012163237259126463, + "loss": 0.7558, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001205113245581123, + "loss": 0.7523, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8932926829268293, + "eval_f1": 0.8933864550762298, + "eval_loss": 0.8372663855552673, + "eval_precision": 0.896811138712137, + "eval_recall": 0.8932926829268293, + "eval_runtime": 8.3513, + "eval_samples_per_second": 432.031, + "eval_steps_per_second": 3.473, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00011900202023450459, + "loss": 0.7483, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9125548232908172, + "eval_loss": 0.8059051632881165, + "eval_precision": 0.9088150905572628, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.9583, + "eval_samples_per_second": 453.364, + "eval_steps_per_second": 3.644, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00011711436484978113, + "loss": 0.7409, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001148607466673341, + "loss": 0.7368, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.9031884942137173, + "eval_loss": 0.814873456954956, + "eval_precision": 0.9025191489702079, + "eval_recall": 0.9054878048780488, + "eval_runtime": 8.2659, + "eval_samples_per_second": 436.494, + "eval_steps_per_second": 3.508, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00011225595568318477, + "loss": 0.7298, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.9003114693023188, + "eval_loss": 0.831853449344635, + "eval_precision": 0.9029294235528434, + "eval_recall": 0.8999445676274944, + "eval_runtime": 8.43, + "eval_samples_per_second": 427.995, + "eval_steps_per_second": 3.44, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00010931708656252174, + "loss": 0.7297, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00010606342645120679, + "loss": 0.724, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.902602842894861, + "eval_loss": 0.8179815411567688, + "eval_precision": 0.9010693862159096, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.2256, + "eval_samples_per_second": 438.629, + "eval_steps_per_second": 3.526, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00010251632839851675, + "loss": 0.7252, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 9.86990712218196e-05, + "loss": 0.7176, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9172777932187847, + "eval_loss": 0.8009869456291199, + "eval_precision": 0.9155348154064488, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.7582, + "eval_samples_per_second": 465.059, + "eval_steps_per_second": 3.738, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 9.463670673286202e-05, + "loss": 0.7194, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9070264678420726, + "eval_loss": 0.8117612600326538, + "eval_precision": 0.907922310963724, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.4543, + "eval_samples_per_second": 426.765, + "eval_steps_per_second": 3.43, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 9.035589532828823e-05, + "loss": 0.7141, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 8.588473102337311e-05, + "loss": 0.7124, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9138366205164367, + "eval_loss": 0.7996091842651367, + "eval_precision": 0.9129615729787861, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.0467, + "eval_samples_per_second": 448.384, + "eval_steps_per_second": 3.604, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 8.125255707723451e-05, + "loss": 0.7125, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 7.648977341953522e-05, + "loss": 0.7116, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.897024149874158, + "eval_loss": 0.824866533279419, + "eval_precision": 0.9032378295128017, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.8472, + "eval_samples_per_second": 459.782, + "eval_steps_per_second": 3.696, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 7.162763714249092e-05, + "loss": 0.7081, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.9057912202810251, + "eval_loss": 0.8238525390625, + "eval_precision": 0.9085258591408971, + "eval_recall": 0.9046563192904656, + "eval_runtime": 7.9373, + "eval_samples_per_second": 454.561, + "eval_steps_per_second": 3.654, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.669805736751019e-05, + "loss": 0.7068, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 6.173338583271193e-05, + "loss": 0.7012, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9152098989872453, + "eval_loss": 0.8057020902633667, + "eval_precision": 0.9130650088576332, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.2388, + "eval_samples_per_second": 437.928, + "eval_steps_per_second": 3.52, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.676620457564739e-05, + "loss": 0.705, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.1829112104618064e-05, + "loss": 0.7011, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.912585785327524, + "eval_loss": 0.8089290857315063, + "eval_precision": 0.9123814360924859, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.3505, + "eval_samples_per_second": 432.071, + "eval_steps_per_second": 3.473, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.6954509461899884e-05, + "loss": 0.7053, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.908041058385437, + "eval_loss": 0.8054780960083008, + "eval_precision": 0.9053392230709744, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.8721, + "eval_samples_per_second": 458.325, + "eval_steps_per_second": 3.684, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.217438758289287e-05, + "loss": 0.6981, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.7520117346711524e-05, + "loss": 0.6948, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9116860395675993, + "eval_loss": 0.813570499420166, + "eval_precision": 0.9093273606451884, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.273, + "eval_samples_per_second": 436.12, + "eval_steps_per_second": 3.505, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.302224369606676e-05, + "loss": 0.6983, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9110358094190079, + "eval_loss": 0.8097633123397827, + "eval_precision": 0.9105008972244054, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8632, + "eval_samples_per_second": 458.844, + "eval_steps_per_second": 3.688, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.8710285177585185e-05, + "loss": 0.6973, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.4612540218138317e-05, + "loss": 0.6935, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9102056754848727, + "eval_loss": 0.8103015422821045, + "eval_precision": 0.9082514700131558, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.2979, + "eval_samples_per_second": 434.807, + "eval_steps_per_second": 3.495, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.0755901408546784e-05, + "loss": 0.6966, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.7165679013474546e-05, + "loss": 0.6946, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9134701172006597, + "eval_loss": 0.8035517930984497, + "eval_precision": 0.912091177669125, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.9686, + "eval_samples_per_second": 452.777, + "eval_steps_per_second": 3.639, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.3865434865778766e-05, + "loss": 0.6938, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9114712313330997, + "eval_loss": 0.8103901743888855, + "eval_precision": 0.9105500475418064, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.9027, + "eval_samples_per_second": 456.551, + "eval_steps_per_second": 3.67, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.0876827735429361e-05, + "loss": 0.6942, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 8.219471187808127e-06, + "loss": 0.6918, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9151006740570711, + "eval_loss": 0.7972990274429321, + "eval_precision": 0.9136796145206859, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.0194, + "eval_samples_per_second": 449.907, + "eval_steps_per_second": 3.616, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.910804864231674e-06, + "loss": 0.6903, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.965980029455506e-06, + "loss": 0.6889, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9175736418884731, + "eval_loss": 0.802456259727478, + "eval_precision": 0.9159691382573251, + "eval_recall": 0.9212860310421286, + "eval_runtime": 8.2688, + "eval_samples_per_second": 436.337, + "eval_steps_per_second": 3.507, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.3977601372858133e-06, + "loss": 0.6926, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9130911696369062, + "eval_loss": 0.8071355819702148, + "eval_precision": 0.9112115622408294, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.1166, + "eval_samples_per_second": 444.523, + "eval_steps_per_second": 3.573, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.216437066864473e-06, + "loss": 0.6934, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.297635793501619e-07, + "loss": 0.6931, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.9183377005731362, + "eval_loss": 0.7986037135124207, + "eval_precision": 0.9151930001236958, + "eval_recall": 0.9232261640798226, + "eval_runtime": 8.2086, + "eval_samples_per_second": 439.538, + "eval_steps_per_second": 3.533, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.1861694065618728, + "learning_rate": 0.0001227019363369938, + "metric": "eval/loss", + "weight_decay": 0.0979868474987772 + } +} diff --git a/run-qsxugvq5/checkpoint-616/training_args.bin b/run-qsxugvq5/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b37479a1223dab8fdc84848c66c970a370595bb --- /dev/null +++ b/run-qsxugvq5/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d999087d6ef5d37bda5853e27cab57448b3ee31a4dcddf61ab561384037c380d +size 4792 diff --git a/run-qsxugvq5/checkpoint-630/model.safetensors b/run-qsxugvq5/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65e74d89c737a8c53a23b84f9a342758d328ba18 --- /dev/null +++ b/run-qsxugvq5/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35e9a8f76a391c1e866ebf955cd5983dc7624ffbeae5cf7885d24a13d26a0f3 +size 198025308 diff --git a/run-qsxugvq5/checkpoint-630/optimizer.pt b/run-qsxugvq5/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4529c1a53a6780f20834ac8f4fe330f4a2ca32e9 --- /dev/null +++ b/run-qsxugvq5/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e70aaeb93624c6cd83790a6f354e40e49f138c00c5d072bc7f6e8eb5ab7ed67a +size 395900602 diff --git a/run-qsxugvq5/checkpoint-630/rng_state.pth b/run-qsxugvq5/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-qsxugvq5/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-qsxugvq5/checkpoint-630/scheduler.pt b/run-qsxugvq5/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..465967a33a6ade616daa442a7df11f47d4eeef83 --- /dev/null +++ b/run-qsxugvq5/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2bdb3a18c4b7d5f8d3cfe7a9bf9945f38cb60e26874474d5131aa8e572d74f3 +size 1064 diff --git a/run-qsxugvq5/checkpoint-630/trainer_state.json b/run-qsxugvq5/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8426230bda4909a73cea4de3410feefb1c3943c8 --- /dev/null +++ b/run-qsxugvq5/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9183377005731362, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-qsxugvq5/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.2659723590324755e-05, + "loss": 1.4844, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8273281596452328, + "eval_f1": 0.7504958631804555, + "eval_loss": 1.093916893005371, + "eval_precision": 0.686721429930464, + "eval_recall": 0.8273281596452328, + "eval_runtime": 8.4994, + "eval_samples_per_second": 424.501, + "eval_steps_per_second": 3.412, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.531944718064951e-05, + "loss": 1.1806, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 3.797917077097427e-05, + "loss": 0.9496, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.864190687361419, + "eval_f1": 0.8275009858077613, + "eval_loss": 0.9336466789245605, + "eval_precision": 0.8520663583165606, + "eval_recall": 0.864190687361419, + "eval_runtime": 8.1808, + "eval_samples_per_second": 441.035, + "eval_steps_per_second": 3.545, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 5.063889436129902e-05, + "loss": 0.8756, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8991130820399114, + "eval_f1": 0.8901114321856429, + "eval_loss": 0.8596798777580261, + "eval_precision": 0.8897549839809881, + "eval_recall": 0.8991130820399114, + "eval_runtime": 8.1201, + "eval_samples_per_second": 444.331, + "eval_steps_per_second": 3.571, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 6.32986179516238e-05, + "loss": 0.8425, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 7.595834154194854e-05, + "loss": 0.8067, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9046812662021747, + "eval_loss": 0.8218482732772827, + "eval_precision": 0.9012847299215914, + "eval_recall": 0.9104767184035477, + "eval_runtime": 8.256, + "eval_samples_per_second": 437.018, + "eval_steps_per_second": 3.513, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 8.86180651322733e-05, + "loss": 0.7992, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010127778872259804, + "loss": 0.7831, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8891352549889135, + "eval_f1": 0.890726798602753, + "eval_loss": 0.8415526747703552, + "eval_precision": 0.897154734238122, + "eval_recall": 0.8891352549889135, + "eval_runtime": 8.1743, + "eval_samples_per_second": 441.382, + "eval_steps_per_second": 3.548, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011393751231292281, + "loss": 0.768, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.8969346088398956, + "eval_loss": 0.8096609711647034, + "eval_precision": 0.8984680470979992, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0773, + "eval_samples_per_second": 446.686, + "eval_steps_per_second": 3.59, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00012268286735181967, + "loss": 0.77, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001223578071444966, + "loss": 0.7618, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9088604032169263, + "eval_loss": 0.8173738121986389, + "eval_precision": 0.907633448779218, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.0448, + "eval_samples_per_second": 448.491, + "eval_steps_per_second": 3.605, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00012163237259126463, + "loss": 0.7558, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001205113245581123, + "loss": 0.7523, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8932926829268293, + "eval_f1": 0.8933864550762298, + "eval_loss": 0.8372663855552673, + "eval_precision": 0.896811138712137, + "eval_recall": 0.8932926829268293, + "eval_runtime": 8.3513, + "eval_samples_per_second": 432.031, + "eval_steps_per_second": 3.473, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00011900202023450459, + "loss": 0.7483, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9125548232908172, + "eval_loss": 0.8059051632881165, + "eval_precision": 0.9088150905572628, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.9583, + "eval_samples_per_second": 453.364, + "eval_steps_per_second": 3.644, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00011711436484978113, + "loss": 0.7409, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001148607466673341, + "loss": 0.7368, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.9031884942137173, + "eval_loss": 0.814873456954956, + "eval_precision": 0.9025191489702079, + "eval_recall": 0.9054878048780488, + "eval_runtime": 8.2659, + "eval_samples_per_second": 436.494, + "eval_steps_per_second": 3.508, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00011225595568318477, + "loss": 0.7298, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.9003114693023188, + "eval_loss": 0.831853449344635, + "eval_precision": 0.9029294235528434, + "eval_recall": 0.8999445676274944, + "eval_runtime": 8.43, + "eval_samples_per_second": 427.995, + "eval_steps_per_second": 3.44, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00010931708656252174, + "loss": 0.7297, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00010606342645120679, + "loss": 0.724, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.902602842894861, + "eval_loss": 0.8179815411567688, + "eval_precision": 0.9010693862159096, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.2256, + "eval_samples_per_second": 438.629, + "eval_steps_per_second": 3.526, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00010251632839851675, + "loss": 0.7252, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 9.86990712218196e-05, + "loss": 0.7176, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9172777932187847, + "eval_loss": 0.8009869456291199, + "eval_precision": 0.9155348154064488, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.7582, + "eval_samples_per_second": 465.059, + "eval_steps_per_second": 3.738, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 9.463670673286202e-05, + "loss": 0.7194, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9070264678420726, + "eval_loss": 0.8117612600326538, + "eval_precision": 0.907922310963724, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.4543, + "eval_samples_per_second": 426.765, + "eval_steps_per_second": 3.43, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 9.035589532828823e-05, + "loss": 0.7141, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 8.588473102337311e-05, + "loss": 0.7124, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9138366205164367, + "eval_loss": 0.7996091842651367, + "eval_precision": 0.9129615729787861, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.0467, + "eval_samples_per_second": 448.384, + "eval_steps_per_second": 3.604, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 8.125255707723451e-05, + "loss": 0.7125, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 7.648977341953522e-05, + "loss": 0.7116, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.897024149874158, + "eval_loss": 0.824866533279419, + "eval_precision": 0.9032378295128017, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.8472, + "eval_samples_per_second": 459.782, + "eval_steps_per_second": 3.696, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 7.162763714249092e-05, + "loss": 0.7081, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.9057912202810251, + "eval_loss": 0.8238525390625, + "eval_precision": 0.9085258591408971, + "eval_recall": 0.9046563192904656, + "eval_runtime": 7.9373, + "eval_samples_per_second": 454.561, + "eval_steps_per_second": 3.654, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 6.669805736751019e-05, + "loss": 0.7068, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 6.173338583271193e-05, + "loss": 0.7012, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9152098989872453, + "eval_loss": 0.8057020902633667, + "eval_precision": 0.9130650088576332, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.2388, + "eval_samples_per_second": 437.928, + "eval_steps_per_second": 3.52, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 5.676620457564739e-05, + "loss": 0.705, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 5.1829112104618064e-05, + "loss": 0.7011, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.912585785327524, + "eval_loss": 0.8089290857315063, + "eval_precision": 0.9123814360924859, + "eval_recall": 0.9165742793791575, + "eval_runtime": 8.3505, + "eval_samples_per_second": 432.071, + "eval_steps_per_second": 3.473, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 4.6954509461899884e-05, + "loss": 0.7053, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.908041058385437, + "eval_loss": 0.8054780960083008, + "eval_precision": 0.9053392230709744, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.8721, + "eval_samples_per_second": 458.325, + "eval_steps_per_second": 3.684, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.217438758289287e-05, + "loss": 0.6981, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 3.7520117346711524e-05, + "loss": 0.6948, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9116860395675993, + "eval_loss": 0.813570499420166, + "eval_precision": 0.9093273606451884, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.273, + "eval_samples_per_second": 436.12, + "eval_steps_per_second": 3.505, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.302224369606676e-05, + "loss": 0.6983, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9110358094190079, + "eval_loss": 0.8097633123397827, + "eval_precision": 0.9105008972244054, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8632, + "eval_samples_per_second": 458.844, + "eval_steps_per_second": 3.688, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 2.8710285177585185e-05, + "loss": 0.6973, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.4612540218138317e-05, + "loss": 0.6935, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9102056754848727, + "eval_loss": 0.8103015422821045, + "eval_precision": 0.9082514700131558, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.2979, + "eval_samples_per_second": 434.807, + "eval_steps_per_second": 3.495, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.0755901408546784e-05, + "loss": 0.6966, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.7165679013474546e-05, + "loss": 0.6946, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9134701172006597, + "eval_loss": 0.8035517930984497, + "eval_precision": 0.912091177669125, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.9686, + "eval_samples_per_second": 452.777, + "eval_steps_per_second": 3.639, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.3865434865778766e-05, + "loss": 0.6938, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9114712313330997, + "eval_loss": 0.8103901743888855, + "eval_precision": 0.9105500475418064, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.9027, + "eval_samples_per_second": 456.551, + "eval_steps_per_second": 3.67, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.0876827735429361e-05, + "loss": 0.6942, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 8.219471187808127e-06, + "loss": 0.6918, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9151006740570711, + "eval_loss": 0.7972990274429321, + "eval_precision": 0.9136796145206859, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.0194, + "eval_samples_per_second": 449.907, + "eval_steps_per_second": 3.616, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 5.910804864231674e-06, + "loss": 0.6903, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 3.965980029455506e-06, + "loss": 0.6889, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9212860310421286, + "eval_f1": 0.9175736418884731, + "eval_loss": 0.802456259727478, + "eval_precision": 0.9159691382573251, + "eval_recall": 0.9212860310421286, + "eval_runtime": 8.2688, + "eval_samples_per_second": 436.337, + "eval_steps_per_second": 3.507, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.3977601372858133e-06, + "loss": 0.6926, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9130911696369062, + "eval_loss": 0.8071355819702148, + "eval_precision": 0.9112115622408294, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.1166, + "eval_samples_per_second": 444.523, + "eval_steps_per_second": 3.573, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.216437066864473e-06, + "loss": 0.6934, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 4.297635793501619e-07, + "loss": 0.6931, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.9183377005731362, + "eval_loss": 0.7986037135124207, + "eval_precision": 0.9151930001236958, + "eval_recall": 0.9232261640798226, + "eval_runtime": 8.2086, + "eval_samples_per_second": 439.538, + "eval_steps_per_second": 3.533, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 4.2902438267339285e-08, + "loss": 0.6932, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9123621161266143, + "eval_loss": 0.8079580068588257, + "eval_precision": 0.9112020371503624, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.1976, + "eval_samples_per_second": 440.126, + "eval_steps_per_second": 3.538, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.1861694065618728, + "learning_rate": 0.0001227019363369938, + "metric": "eval/loss", + "weight_decay": 0.0979868474987772 + } +} diff --git a/run-qsxugvq5/checkpoint-630/training_args.bin b/run-qsxugvq5/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b37479a1223dab8fdc84848c66c970a370595bb --- /dev/null +++ b/run-qsxugvq5/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d999087d6ef5d37bda5853e27cab57448b3ee31a4dcddf61ab561384037c380d +size 4792 diff --git a/run-qulonkt5/checkpoint-1190/model.safetensors b/run-qulonkt5/checkpoint-1190/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2fb861ca0d2fcbb0b246a170b62713ac0b30513e --- /dev/null +++ b/run-qulonkt5/checkpoint-1190/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0720da520614cb24a57ed29b36eacbdb5e579636763c9f183af2ad661ce28b0 +size 198025308 diff --git a/run-qulonkt5/checkpoint-1190/optimizer.pt b/run-qulonkt5/checkpoint-1190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..481fa9cd2ff764b93ab1237747afa40aa0f394ed --- /dev/null +++ b/run-qulonkt5/checkpoint-1190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93a6983976d5b5605771418e5c5bb6eab764e25ab9544f47a336ff838de62d0 +size 395900602 diff --git a/run-qulonkt5/checkpoint-1190/rng_state.pth b/run-qulonkt5/checkpoint-1190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa10329c52a02715f873c9a50812feb7d32c8cd3 --- /dev/null +++ b/run-qulonkt5/checkpoint-1190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f5febab37757cc5268c77056c937c9c526090d892464a785cf2004d48e5d85 +size 14244 diff --git a/run-qulonkt5/checkpoint-1190/scheduler.pt b/run-qulonkt5/checkpoint-1190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cb7650c3f02156bcfe1c2df5cadd4c17e08ddb7 --- /dev/null +++ b/run-qulonkt5/checkpoint-1190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d61291cdc22faca8b65d2165fba0f3a7d8be23845ed97ac033969edf9198741 +size 1064 diff --git a/run-qulonkt5/checkpoint-1190/trainer_state.json b/run-qulonkt5/checkpoint-1190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..56fc936995136f4fe8601f36b8500e0170b15a51 --- /dev/null +++ b/run-qulonkt5/checkpoint-1190/trainer_state.json @@ -0,0 +1,549 @@ +{ + "best_metric": 0.9254434589800443, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-qulonkt5/checkpoint-1190", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 1190, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.4180777735918655e-05, + "loss": 1.3494, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8495011086474501, + "eval_loss": 0.9360548257827759, + "eval_runtime": 6.8004, + "eval_samples_per_second": 530.556, + "eval_steps_per_second": 8.382, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.836155547183731e-05, + "loss": 0.9447, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.254233320775595e-05, + "loss": 0.8484, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8134665489196777, + "eval_runtime": 6.7577, + "eval_samples_per_second": 533.913, + "eval_steps_per_second": 8.435, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 9.672311094367462e-05, + "loss": 0.809, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8324153423309326, + "eval_runtime": 6.902, + "eval_samples_per_second": 522.744, + "eval_steps_per_second": 8.258, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00012090388867959327, + "loss": 0.7933, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001450846664155119, + "loss": 0.783, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8505873680114746, + "eval_runtime": 6.737, + "eval_samples_per_second": 535.549, + "eval_steps_per_second": 8.461, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00016647219083812365, + "loss": 0.7768, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00016617990901627322, + "loss": 0.7737, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8140047788619995, + "eval_runtime": 6.9103, + "eval_samples_per_second": 522.12, + "eval_steps_per_second": 8.249, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001654142970884026, + "loss": 0.7648, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8320362567901611, + "eval_runtime": 6.9755, + "eval_samples_per_second": 517.238, + "eval_steps_per_second": 8.171, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016417972420599327, + "loss": 0.7551, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00016248323576076624, + "loss": 0.7513, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8033508062362671, + "eval_runtime": 6.5873, + "eval_samples_per_second": 547.723, + "eval_steps_per_second": 8.653, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00016033451317843344, + "loss": 0.7504, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001577458186692767, + "loss": 0.7445, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8725055432372506, + "eval_loss": 0.8712900280952454, + "eval_runtime": 6.8725, + "eval_samples_per_second": 524.993, + "eval_steps_per_second": 8.294, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00015473192525084978, + "loss": 0.7315, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8059677481651306, + "eval_runtime": 6.6847, + "eval_samples_per_second": 539.738, + "eval_steps_per_second": 8.527, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001513100324421449, + "loss": 0.7399, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00014749966811033658, + "loss": 0.7305, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8077369332313538, + "eval_runtime": 6.8796, + "eval_samples_per_second": 524.448, + "eval_steps_per_second": 8.285, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00014332257703023718, + "loss": 0.7263, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8096019625663757, + "eval_runtime": 6.8551, + "eval_samples_per_second": 526.327, + "eval_steps_per_second": 8.315, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00013880259679242826, + "loss": 0.7235, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00013396552176822685, + "loss": 0.722, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8104827404022217, + "eval_runtime": 6.8186, + "eval_samples_per_second": 529.143, + "eval_steps_per_second": 8.36, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012883895590780489, + "loss": 0.7206, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001234521552115048, + "loss": 0.7185, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8184380531311035, + "eval_runtime": 6.6097, + "eval_samples_per_second": 545.867, + "eval_steps_per_second": 8.624, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011783586077332726, + "loss": 0.7135, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8120481967926025, + "eval_runtime": 6.6863, + "eval_samples_per_second": 539.61, + "eval_steps_per_second": 8.525, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00011202212334936899, + "loss": 0.7155, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010604412045235427, + "loss": 0.7077, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8206501603126526, + "eval_runtime": 6.851, + "eval_samples_per_second": 526.637, + "eval_steps_per_second": 8.32, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 9.993596701605437e-05, + "loss": 0.7104, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.373252071008465e-05, + "loss": 0.7052, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8183068633079529, + "eval_runtime": 6.5752, + "eval_samples_per_second": 548.725, + "eval_steps_per_second": 8.669, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.746918301609826e-05, + "loss": 0.7042, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.815409779548645, + "eval_runtime": 6.4834, + "eval_samples_per_second": 556.502, + "eval_steps_per_second": 8.792, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.118169720058375e-05, + "loss": 0.6982, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.490594433718378e-05, + "loss": 0.7001, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8042070865631104, + "eval_runtime": 6.7928, + "eval_samples_per_second": 531.149, + "eval_steps_per_second": 8.391, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.867773854258376e-05, + "loss": 0.6961, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.25326225945067e-05, + "loss": 0.699, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8133599162101746, + "eval_runtime": 6.6735, + "eval_samples_per_second": 540.646, + "eval_steps_per_second": 8.541, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.650566509817019e-05, + "loss": 0.7041, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8025786280632019, + "eval_runtime": 6.8045, + "eval_samples_per_second": 530.24, + "eval_steps_per_second": 8.377, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.063126035872462e-05, + "loss": 0.693, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.4942932101749223e-05, + "loss": 0.6948, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.8015178442001343, + "eval_runtime": 6.9495, + "eval_samples_per_second": 519.177, + "eval_steps_per_second": 8.202, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.9473142161923346e-05, + "loss": 0.6907, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8122717142105103, + "eval_runtime": 6.7448, + "eval_samples_per_second": 534.929, + "eval_steps_per_second": 8.451, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.425310523163749e-05, + "loss": 0.6918, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.9312610726726258e-05, + "loss": 0.6918, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8021332025527954, + "eval_runtime": 6.7996, + "eval_samples_per_second": 530.617, + "eval_steps_per_second": 8.383, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.4679852785890483e-05, + "loss": 0.6903, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.038126937395761e-05, + "loss": 0.6906, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.799936830997467, + "eval_runtime": 6.7174, + "eval_samples_per_second": 537.109, + "eval_steps_per_second": 8.485, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.6441391407177517e-05, + "loss": 0.686, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8048804998397827, + "eval_runtime": 6.7675, + "eval_samples_per_second": 533.135, + "eval_steps_per_second": 8.423, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.288270276155674e-05, + "loss": 0.6886, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 9.725511963128054e-06, + "loss": 0.6875, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8049158453941345, + "eval_runtime": 6.8623, + "eval_samples_per_second": 525.77, + "eval_steps_per_second": 8.306, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.987836292385739e-06, + "loss": 0.6876, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.68529896427359e-06, + "loss": 0.6898, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.8007503747940063, + "eval_runtime": 6.7232, + "eval_samples_per_second": 536.648, + "eval_steps_per_second": 8.478, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.8310399704924358e-06, + "loss": 0.6899, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9254434589800443, + "eval_loss": 0.7960415482521057, + "eval_runtime": 6.6537, + "eval_samples_per_second": 542.256, + "eval_steps_per_second": 8.567, + "step": 1190 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00016647535441267073, + "metric": "eval/loss", + "warmup_ratio": 0.14194784825804713 + } +} diff --git a/run-qulonkt5/checkpoint-1190/training_args.bin b/run-qulonkt5/checkpoint-1190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e913d7e5e1ea02ffe4b46390109cfe976d6e75d3 --- /dev/null +++ b/run-qulonkt5/checkpoint-1190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8227fb93308b289e3119ee0da8bf24b82b82b272f3374c98337d39f7bd9ed4db +size 4792 diff --git a/run-qulonkt5/checkpoint-1260/model.safetensors b/run-qulonkt5/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..828621b04d701d92a54fa41db3ff94b50e0393f5 --- /dev/null +++ b/run-qulonkt5/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4747056c1e6cf8096966ca0c03277e0cf8b51901dfe0dbfbb6c7724db56b012 +size 198025308 diff --git a/run-qulonkt5/checkpoint-1260/optimizer.pt b/run-qulonkt5/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b685b067812b45a1717c64256747937e8816ffb --- /dev/null +++ b/run-qulonkt5/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8814f8a8225f89aba083dfccde996855969d637fb1cea9b19298055a27cd9760 +size 395900602 diff --git a/run-qulonkt5/checkpoint-1260/rng_state.pth b/run-qulonkt5/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-qulonkt5/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-qulonkt5/checkpoint-1260/scheduler.pt b/run-qulonkt5/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea25de7c6f25f655ad392da43fb3291fb76e419d --- /dev/null +++ b/run-qulonkt5/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:416915f9a5cd9fcaebc41e1ce71235be97979f8e6b470f087aabf995270bd816 +size 1064 diff --git a/run-qulonkt5/checkpoint-1260/trainer_state.json b/run-qulonkt5/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..85213a0ecd0056ffef8fad81b06f6ac867de2d17 --- /dev/null +++ b/run-qulonkt5/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9254434589800443, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-qulonkt5/checkpoint-1190", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.4180777735918655e-05, + "loss": 1.3494, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8495011086474501, + "eval_loss": 0.9360548257827759, + "eval_runtime": 6.8004, + "eval_samples_per_second": 530.556, + "eval_steps_per_second": 8.382, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.836155547183731e-05, + "loss": 0.9447, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.254233320775595e-05, + "loss": 0.8484, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8134665489196777, + "eval_runtime": 6.7577, + "eval_samples_per_second": 533.913, + "eval_steps_per_second": 8.435, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 9.672311094367462e-05, + "loss": 0.809, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8324153423309326, + "eval_runtime": 6.902, + "eval_samples_per_second": 522.744, + "eval_steps_per_second": 8.258, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00012090388867959327, + "loss": 0.7933, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001450846664155119, + "loss": 0.783, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8505873680114746, + "eval_runtime": 6.737, + "eval_samples_per_second": 535.549, + "eval_steps_per_second": 8.461, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00016647219083812365, + "loss": 0.7768, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00016617990901627322, + "loss": 0.7737, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8140047788619995, + "eval_runtime": 6.9103, + "eval_samples_per_second": 522.12, + "eval_steps_per_second": 8.249, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001654142970884026, + "loss": 0.7648, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.8320362567901611, + "eval_runtime": 6.9755, + "eval_samples_per_second": 517.238, + "eval_steps_per_second": 8.171, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016417972420599327, + "loss": 0.7551, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00016248323576076624, + "loss": 0.7513, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8033508062362671, + "eval_runtime": 6.5873, + "eval_samples_per_second": 547.723, + "eval_steps_per_second": 8.653, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00016033451317843344, + "loss": 0.7504, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001577458186692767, + "loss": 0.7445, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8725055432372506, + "eval_loss": 0.8712900280952454, + "eval_runtime": 6.8725, + "eval_samples_per_second": 524.993, + "eval_steps_per_second": 8.294, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00015473192525084978, + "loss": 0.7315, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8059677481651306, + "eval_runtime": 6.6847, + "eval_samples_per_second": 539.738, + "eval_steps_per_second": 8.527, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001513100324421449, + "loss": 0.7399, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00014749966811033658, + "loss": 0.7305, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8077369332313538, + "eval_runtime": 6.8796, + "eval_samples_per_second": 524.448, + "eval_steps_per_second": 8.285, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00014332257703023718, + "loss": 0.7263, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8096019625663757, + "eval_runtime": 6.8551, + "eval_samples_per_second": 526.327, + "eval_steps_per_second": 8.315, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00013880259679242826, + "loss": 0.7235, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00013396552176822685, + "loss": 0.722, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8104827404022217, + "eval_runtime": 6.8186, + "eval_samples_per_second": 529.143, + "eval_steps_per_second": 8.36, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012883895590780489, + "loss": 0.7206, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001234521552115048, + "loss": 0.7185, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8184380531311035, + "eval_runtime": 6.6097, + "eval_samples_per_second": 545.867, + "eval_steps_per_second": 8.624, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011783586077332726, + "loss": 0.7135, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8120481967926025, + "eval_runtime": 6.6863, + "eval_samples_per_second": 539.61, + "eval_steps_per_second": 8.525, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00011202212334936899, + "loss": 0.7155, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010604412045235427, + "loss": 0.7077, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8206501603126526, + "eval_runtime": 6.851, + "eval_samples_per_second": 526.637, + "eval_steps_per_second": 8.32, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 9.993596701605437e-05, + "loss": 0.7104, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.373252071008465e-05, + "loss": 0.7052, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8183068633079529, + "eval_runtime": 6.5752, + "eval_samples_per_second": 548.725, + "eval_steps_per_second": 8.669, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.746918301609826e-05, + "loss": 0.7042, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.815409779548645, + "eval_runtime": 6.4834, + "eval_samples_per_second": 556.502, + "eval_steps_per_second": 8.792, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.118169720058375e-05, + "loss": 0.6982, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.490594433718378e-05, + "loss": 0.7001, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8042070865631104, + "eval_runtime": 6.7928, + "eval_samples_per_second": 531.149, + "eval_steps_per_second": 8.391, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.867773854258376e-05, + "loss": 0.6961, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.25326225945067e-05, + "loss": 0.699, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8133599162101746, + "eval_runtime": 6.6735, + "eval_samples_per_second": 540.646, + "eval_steps_per_second": 8.541, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.650566509817019e-05, + "loss": 0.7041, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8025786280632019, + "eval_runtime": 6.8045, + "eval_samples_per_second": 530.24, + "eval_steps_per_second": 8.377, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.063126035872462e-05, + "loss": 0.693, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.4942932101749223e-05, + "loss": 0.6948, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.8015178442001343, + "eval_runtime": 6.9495, + "eval_samples_per_second": 519.177, + "eval_steps_per_second": 8.202, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.9473142161923346e-05, + "loss": 0.6907, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8122717142105103, + "eval_runtime": 6.7448, + "eval_samples_per_second": 534.929, + "eval_steps_per_second": 8.451, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.425310523163749e-05, + "loss": 0.6918, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.9312610726726258e-05, + "loss": 0.6918, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8021332025527954, + "eval_runtime": 6.7996, + "eval_samples_per_second": 530.617, + "eval_steps_per_second": 8.383, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.4679852785890483e-05, + "loss": 0.6903, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.038126937395761e-05, + "loss": 0.6906, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.799936830997467, + "eval_runtime": 6.7174, + "eval_samples_per_second": 537.109, + "eval_steps_per_second": 8.485, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.6441391407177517e-05, + "loss": 0.686, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8048804998397827, + "eval_runtime": 6.7675, + "eval_samples_per_second": 533.135, + "eval_steps_per_second": 8.423, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.288270276155674e-05, + "loss": 0.6886, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 9.725511963128054e-06, + "loss": 0.6875, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8049158453941345, + "eval_runtime": 6.8623, + "eval_samples_per_second": 525.77, + "eval_steps_per_second": 8.306, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 6.987836292385739e-06, + "loss": 0.6876, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.68529896427359e-06, + "loss": 0.6898, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.8007503747940063, + "eval_runtime": 6.7232, + "eval_samples_per_second": 536.648, + "eval_steps_per_second": 8.478, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.8310399704924358e-06, + "loss": 0.6899, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9254434589800443, + "eval_loss": 0.7960415482521057, + "eval_runtime": 6.6537, + "eval_samples_per_second": 542.256, + "eval_steps_per_second": 8.567, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.4356410929278291e-06, + "loss": 0.6834, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 5.070655161269742e-07, + "loss": 0.6856, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8039860129356384, + "eval_runtime": 7.1261, + "eval_samples_per_second": 506.306, + "eval_steps_per_second": 7.999, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 5.0612383440887025e-08, + "loss": 0.6877, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8024393320083618, + "eval_runtime": 7.146, + "eval_samples_per_second": 504.901, + "eval_steps_per_second": 7.977, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00016647535441267073, + "metric": "eval/loss", + "warmup_ratio": 0.14194784825804713 + } +} diff --git a/run-qulonkt5/checkpoint-1260/training_args.bin b/run-qulonkt5/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e913d7e5e1ea02ffe4b46390109cfe976d6e75d3 --- /dev/null +++ b/run-qulonkt5/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8227fb93308b289e3119ee0da8bf24b82b82b272f3374c98337d39f7bd9ed4db +size 4792 diff --git a/run-qz988uch/checkpoint-616/model.safetensors b/run-qz988uch/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86af12870617ce2788eb3a31be7d26706d791685 --- /dev/null +++ b/run-qz988uch/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35595f951f65e5c989df0f9e61f4b88d1cb40faa1fbfb5e4c3663237eabc3434 +size 198025308 diff --git a/run-qz988uch/checkpoint-616/optimizer.pt b/run-qz988uch/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c83f933f4711800abefbbce4e3360c44e4093f6 --- /dev/null +++ b/run-qz988uch/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf41a4a74c5f40060a3ef36c03c9b8a04c27f7efee3747d4a7b88ee854d3f0fa +size 395900602 diff --git a/run-qz988uch/checkpoint-616/rng_state.pth b/run-qz988uch/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-qz988uch/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-qz988uch/checkpoint-616/scheduler.pt b/run-qz988uch/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d71e46d652989ecd103e0dddc84151b8b6d7469f --- /dev/null +++ b/run-qz988uch/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c1f23654124c87ea677c50cf6c0a6e2520aa65fa54ba64986d2882a085eae6 +size 1064 diff --git a/run-qz988uch/checkpoint-616/trainer_state.json b/run-qz988uch/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..72562ff8f697e7574d6db044ca1c82e76be8eeed --- /dev/null +++ b/run-qz988uch/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9182401356334213, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-qz988uch/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.085405288993838e-06, + "loss": 1.5024, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8201219512195121, + "eval_f1": 0.749711905059865, + "eval_loss": 1.238232970237732, + "eval_precision": 0.7019971927010356, + "eval_recall": 0.8201219512195121, + "eval_runtime": 8.2502, + "eval_samples_per_second": 437.325, + "eval_steps_per_second": 3.515, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.6170810577987675e-05, + "loss": 1.2943, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.4256215866981513e-05, + "loss": 1.0103, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8295454545454546, + "eval_f1": 0.7533478141085029, + "eval_loss": 0.9392069578170776, + "eval_precision": 0.7704949815553549, + "eval_recall": 0.8295454545454546, + "eval_runtime": 8.0928, + "eval_samples_per_second": 445.827, + "eval_steps_per_second": 3.583, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.234162115597535e-05, + "loss": 0.9099, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8871951219512195, + "eval_f1": 0.8740437235982524, + "eval_loss": 0.8960732221603394, + "eval_precision": 0.881918455359181, + "eval_recall": 0.8871951219512195, + "eval_runtime": 8.2795, + "eval_samples_per_second": 435.775, + "eval_steps_per_second": 3.503, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 4.042702644496919e-05, + "loss": 0.8718, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 4.8512431733963025e-05, + "loss": 0.8292, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8978827477355693, + "eval_loss": 0.8212575316429138, + "eval_precision": 0.8947130720002221, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.1119, + "eval_samples_per_second": 444.78, + "eval_steps_per_second": 3.575, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 5.659783702295686e-05, + "loss": 0.815, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 6.46832423119507e-05, + "loss": 0.7936, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8963414634146342, + "eval_f1": 0.896125410691494, + "eval_loss": 0.8351534008979797, + "eval_precision": 0.8980312892484139, + "eval_recall": 0.8963414634146342, + "eval_runtime": 8.2501, + "eval_samples_per_second": 437.33, + "eval_steps_per_second": 3.515, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 7.276864760094454e-05, + "loss": 0.7773, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9053681108806009, + "eval_loss": 0.8025357127189636, + "eval_precision": 0.9038624731249939, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.0981, + "eval_samples_per_second": 445.537, + "eval_steps_per_second": 3.581, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 7.83540570596207e-05, + "loss": 0.7799, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 7.814645035314094e-05, + "loss": 0.7723, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9089610292390105, + "eval_loss": 0.8009293079376221, + "eval_precision": 0.9066326284837471, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.8595, + "eval_samples_per_second": 459.064, + "eval_steps_per_second": 3.69, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 7.768313594254801e-05, + "loss": 0.768, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 7.696715445750253e-05, + "loss": 0.761, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9041089056239187, + "eval_loss": 0.807222843170166, + "eval_precision": 0.9011303077254808, + "eval_recall": 0.9096452328159645, + "eval_runtime": 8.4027, + "eval_samples_per_second": 429.384, + "eval_steps_per_second": 3.451, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 7.600320472560432e-05, + "loss": 0.7584, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9008834400443207, + "eval_loss": 0.8148975372314453, + "eval_precision": 0.902375824684657, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.8084, + "eval_samples_per_second": 462.064, + "eval_steps_per_second": 3.714, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 7.479761293502959e-05, + "loss": 0.752, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 7.335829111716181e-05, + "loss": 0.7472, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.9041229546330667, + "eval_loss": 0.8129636645317078, + "eval_precision": 0.9020925500466057, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.3846, + "eval_samples_per_second": 430.312, + "eval_steps_per_second": 3.459, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 7.169468522168553e-05, + "loss": 0.7417, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.9054215279563246, + "eval_loss": 0.8212863802909851, + "eval_precision": 0.9060158876229262, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.2089, + "eval_samples_per_second": 439.522, + "eval_steps_per_second": 3.533, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 6.981771312491482e-05, + "loss": 0.7393, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 6.773969297819379e-05, + "loss": 0.7336, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9090804744619976, + "eval_loss": 0.800468921661377, + "eval_precision": 0.9064542387511471, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9123, + "eval_samples_per_second": 455.998, + "eval_steps_per_second": 3.665, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 6.547426236660299e-05, + "loss": 0.7366, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 6.303628880851483e-05, + "loss": 0.7269, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.9080880011551009, + "eval_loss": 0.8160756230354309, + "eval_precision": 0.9091475262884593, + "eval_recall": 0.9085365853658537, + "eval_runtime": 8.2359, + "eval_samples_per_second": 438.082, + "eval_steps_per_second": 3.521, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 6.04417721833698e-05, + "loss": 0.7343, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9077139651729391, + "eval_loss": 0.8051671385765076, + "eval_precision": 0.9042483151544571, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.3381, + "eval_samples_per_second": 432.713, + "eval_steps_per_second": 3.478, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 5.770773972801836e-05, + "loss": 0.7219, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 5.4852134290744055e-05, + "loss": 0.7238, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.908656515327096, + "eval_loss": 0.7985002994537354, + "eval_precision": 0.9069937146292176, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.1197, + "eval_samples_per_second": 444.349, + "eval_steps_per_second": 3.572, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 5.189369657633202e-05, + "loss": 0.7222, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 4.885184215498226e-05, + "loss": 0.7211, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9118043053755276, + "eval_loss": 0.8112980127334595, + "eval_precision": 0.9137097066661989, + "eval_recall": 0.9210088691796009, + "eval_runtime": 8.3319, + "eval_samples_per_second": 433.033, + "eval_steps_per_second": 3.481, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 4.574653404223109e-05, + "loss": 0.7176, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8949556541019955, + "eval_f1": 0.8981934585436359, + "eval_loss": 0.8334438800811768, + "eval_precision": 0.905331625230285, + "eval_recall": 0.8949556541019955, + "eval_runtime": 8.1955, + "eval_samples_per_second": 440.242, + "eval_steps_per_second": 3.539, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 4.259815168611017e-05, + "loss": 0.7164, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 3.942735722135162e-05, + "loss": 0.7142, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9136182136624843, + "eval_loss": 0.805989682674408, + "eval_precision": 0.911419630429825, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.2561, + "eval_samples_per_second": 437.009, + "eval_steps_per_second": 3.513, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 3.6254959868383e-05, + "loss": 0.7152, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 3.3101779367031456e-05, + "loss": 0.7095, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9112486175871788, + "eval_loss": 0.8037068843841553, + "eval_precision": 0.9116618353975676, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.096, + "eval_samples_per_second": 445.653, + "eval_steps_per_second": 3.582, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 2.9988509341191512e-05, + "loss": 0.7139, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9090312949559141, + "eval_loss": 0.8076122403144836, + "eval_precision": 0.9077325865058469, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0646, + "eval_samples_per_second": 447.386, + "eval_steps_per_second": 3.596, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 2.693558149116355e-05, + "loss": 0.7075, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.3963031504938934e-05, + "loss": 0.7039, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9106544633502627, + "eval_loss": 0.8057740330696106, + "eval_precision": 0.90831405839241, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.72, + "eval_samples_per_second": 467.356, + "eval_steps_per_second": 3.756, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.1090367568425905e-05, + "loss": 0.7085, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9111172577095981, + "eval_loss": 0.8054983019828796, + "eval_precision": 0.909930572772933, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.5555, + "eval_samples_per_second": 421.717, + "eval_steps_per_second": 3.39, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.8336442337554523e-05, + "loss": 0.7058, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.57193292124796e-05, + "loss": 0.704, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9111130521102017, + "eval_loss": 0.8064684867858887, + "eval_precision": 0.9095999977150472, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.5384, + "eval_samples_per_second": 422.56, + "eval_steps_per_second": 3.396, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.3256203725866163e-05, + "loss": 0.7048, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.0963230823680033e-05, + "loss": 0.7036, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9180278111340481, + "eval_loss": 0.7965641021728516, + "eval_precision": 0.9164621430941197, + "eval_recall": 0.9210088691796009, + "eval_runtime": 8.5667, + "eval_samples_per_second": 421.165, + "eval_steps_per_second": 3.385, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 8.855458778234772e-06, + "loss": 0.7023, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9138698060860845, + "eval_loss": 0.8049460053443909, + "eval_precision": 0.9133453310746181, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.1975, + "eval_samples_per_second": 440.133, + "eval_steps_per_second": 3.538, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 6.946720429719858e-06, + "loss": 0.7033, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 5.24954240434024e-06, + "loss": 0.6996, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9159791246804138, + "eval_loss": 0.79762864112854, + "eval_precision": 0.9154619757757954, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.937, + "eval_samples_per_second": 454.579, + "eval_steps_per_second": 3.654, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.7750629048484066e-06, + "loss": 0.7, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.532958612991398e-06, + "loss": 0.6973, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9160555463777809, + "eval_loss": 0.802642285823822, + "eval_precision": 0.9140200001689326, + "eval_recall": 0.9196230598669624, + "eval_runtime": 7.8782, + "eval_samples_per_second": 457.972, + "eval_steps_per_second": 3.681, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.5313811835959666e-06, + "loss": 0.7019, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9139763003067741, + "eval_loss": 0.803246021270752, + "eval_precision": 0.9129349370238264, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.1889, + "eval_samples_per_second": 440.597, + "eval_steps_per_second": 3.541, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 7.769037470668707e-07, + "loss": 0.7018, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.744777713907088e-07, + "loss": 0.7025, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.9182401356334213, + "eval_loss": 0.7972019910812378, + "eval_precision": 0.9150914553528028, + "eval_recall": 0.9232261640798226, + "eval_runtime": 8.2025, + "eval_samples_per_second": 439.866, + "eval_steps_per_second": 3.536, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.020871199296353588, + "learning_rate": 7.836623587794027e-05, + "metric": "eval/loss", + "weight_decay": 0.011144668170968723 + } +} diff --git a/run-qz988uch/checkpoint-616/training_args.bin b/run-qz988uch/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e10627af6f463b842fdbeee79f083956d2dc1624 --- /dev/null +++ b/run-qz988uch/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82697f56d621790309f3719e38f39541ada3d586bec1752f44fd875b2679e433 +size 4792 diff --git a/run-qz988uch/checkpoint-630/model.safetensors b/run-qz988uch/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f07f0158a6a200bd48ba53f913df4179108865aa --- /dev/null +++ b/run-qz988uch/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6cde41a15fe40895b492c9f0280539abd64776cdc2ec880d0eaa4d9ed9f068 +size 198025308 diff --git a/run-qz988uch/checkpoint-630/optimizer.pt b/run-qz988uch/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..627e70e10b4cd48c4065c942c28039e08ceab1e6 --- /dev/null +++ b/run-qz988uch/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23ac141cb1ba18a3606074053419de0552b547a70e54589e6708ae7a5d4a47cd +size 395900602 diff --git a/run-qz988uch/checkpoint-630/rng_state.pth b/run-qz988uch/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-qz988uch/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-qz988uch/checkpoint-630/scheduler.pt b/run-qz988uch/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9972661c9ca56d91cb83d752995189471b95d56 --- /dev/null +++ b/run-qz988uch/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124eb9ff43b244ea55483f2fc5a084495661fca5215cc5b12fe6e5900421558b +size 1064 diff --git a/run-qz988uch/checkpoint-630/trainer_state.json b/run-qz988uch/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d2c8d9f75a59469780a0bc5254ad08b7a6225e63 --- /dev/null +++ b/run-qz988uch/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9182401356334213, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-qz988uch/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.085405288993838e-06, + "loss": 1.5024, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8201219512195121, + "eval_f1": 0.749711905059865, + "eval_loss": 1.238232970237732, + "eval_precision": 0.7019971927010356, + "eval_recall": 0.8201219512195121, + "eval_runtime": 8.2502, + "eval_samples_per_second": 437.325, + "eval_steps_per_second": 3.515, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.6170810577987675e-05, + "loss": 1.2943, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.4256215866981513e-05, + "loss": 1.0103, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8295454545454546, + "eval_f1": 0.7533478141085029, + "eval_loss": 0.9392069578170776, + "eval_precision": 0.7704949815553549, + "eval_recall": 0.8295454545454546, + "eval_runtime": 8.0928, + "eval_samples_per_second": 445.827, + "eval_steps_per_second": 3.583, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.234162115597535e-05, + "loss": 0.9099, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8871951219512195, + "eval_f1": 0.8740437235982524, + "eval_loss": 0.8960732221603394, + "eval_precision": 0.881918455359181, + "eval_recall": 0.8871951219512195, + "eval_runtime": 8.2795, + "eval_samples_per_second": 435.775, + "eval_steps_per_second": 3.503, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 4.042702644496919e-05, + "loss": 0.8718, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 4.8512431733963025e-05, + "loss": 0.8292, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8978827477355693, + "eval_loss": 0.8212575316429138, + "eval_precision": 0.8947130720002221, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.1119, + "eval_samples_per_second": 444.78, + "eval_steps_per_second": 3.575, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 5.659783702295686e-05, + "loss": 0.815, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 6.46832423119507e-05, + "loss": 0.7936, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8963414634146342, + "eval_f1": 0.896125410691494, + "eval_loss": 0.8351534008979797, + "eval_precision": 0.8980312892484139, + "eval_recall": 0.8963414634146342, + "eval_runtime": 8.2501, + "eval_samples_per_second": 437.33, + "eval_steps_per_second": 3.515, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 7.276864760094454e-05, + "loss": 0.7773, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9053681108806009, + "eval_loss": 0.8025357127189636, + "eval_precision": 0.9038624731249939, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.0981, + "eval_samples_per_second": 445.537, + "eval_steps_per_second": 3.581, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 7.83540570596207e-05, + "loss": 0.7799, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 7.814645035314094e-05, + "loss": 0.7723, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9089610292390105, + "eval_loss": 0.8009293079376221, + "eval_precision": 0.9066326284837471, + "eval_recall": 0.916019955654102, + "eval_runtime": 7.8595, + "eval_samples_per_second": 459.064, + "eval_steps_per_second": 3.69, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 7.768313594254801e-05, + "loss": 0.768, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 7.696715445750253e-05, + "loss": 0.761, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9041089056239187, + "eval_loss": 0.807222843170166, + "eval_precision": 0.9011303077254808, + "eval_recall": 0.9096452328159645, + "eval_runtime": 8.4027, + "eval_samples_per_second": 429.384, + "eval_steps_per_second": 3.451, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 7.600320472560432e-05, + "loss": 0.7584, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9124168514412417, + "eval_f1": 0.9008834400443207, + "eval_loss": 0.8148975372314453, + "eval_precision": 0.902375824684657, + "eval_recall": 0.9124168514412417, + "eval_runtime": 7.8084, + "eval_samples_per_second": 462.064, + "eval_steps_per_second": 3.714, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 7.479761293502959e-05, + "loss": 0.752, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 7.335829111716181e-05, + "loss": 0.7472, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.9041229546330667, + "eval_loss": 0.8129636645317078, + "eval_precision": 0.9020925500466057, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.3846, + "eval_samples_per_second": 430.312, + "eval_steps_per_second": 3.459, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 7.169468522168553e-05, + "loss": 0.7417, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.9054215279563246, + "eval_loss": 0.8212863802909851, + "eval_precision": 0.9060158876229262, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.2089, + "eval_samples_per_second": 439.522, + "eval_steps_per_second": 3.533, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 6.981771312491482e-05, + "loss": 0.7393, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 6.773969297819379e-05, + "loss": 0.7336, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9090804744619976, + "eval_loss": 0.800468921661377, + "eval_precision": 0.9064542387511471, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.9123, + "eval_samples_per_second": 455.998, + "eval_steps_per_second": 3.665, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 6.547426236660299e-05, + "loss": 0.7366, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 6.303628880851483e-05, + "loss": 0.7269, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.9080880011551009, + "eval_loss": 0.8160756230354309, + "eval_precision": 0.9091475262884593, + "eval_recall": 0.9085365853658537, + "eval_runtime": 8.2359, + "eval_samples_per_second": 438.082, + "eval_steps_per_second": 3.521, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 6.04417721833698e-05, + "loss": 0.7343, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9077139651729391, + "eval_loss": 0.8051671385765076, + "eval_precision": 0.9042483151544571, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.3381, + "eval_samples_per_second": 432.713, + "eval_steps_per_second": 3.478, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 5.770773972801836e-05, + "loss": 0.7219, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 5.4852134290744055e-05, + "loss": 0.7238, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.908656515327096, + "eval_loss": 0.7985002994537354, + "eval_precision": 0.9069937146292176, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.1197, + "eval_samples_per_second": 444.349, + "eval_steps_per_second": 3.572, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 5.189369657633202e-05, + "loss": 0.7222, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 4.885184215498226e-05, + "loss": 0.7211, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9118043053755276, + "eval_loss": 0.8112980127334595, + "eval_precision": 0.9137097066661989, + "eval_recall": 0.9210088691796009, + "eval_runtime": 8.3319, + "eval_samples_per_second": 433.033, + "eval_steps_per_second": 3.481, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 4.574653404223109e-05, + "loss": 0.7176, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8949556541019955, + "eval_f1": 0.8981934585436359, + "eval_loss": 0.8334438800811768, + "eval_precision": 0.905331625230285, + "eval_recall": 0.8949556541019955, + "eval_runtime": 8.1955, + "eval_samples_per_second": 440.242, + "eval_steps_per_second": 3.539, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 4.259815168611017e-05, + "loss": 0.7164, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 3.942735722135162e-05, + "loss": 0.7142, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9136182136624843, + "eval_loss": 0.805989682674408, + "eval_precision": 0.911419630429825, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.2561, + "eval_samples_per_second": 437.009, + "eval_steps_per_second": 3.513, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 3.6254959868383e-05, + "loss": 0.7152, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 3.3101779367031456e-05, + "loss": 0.7095, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9112486175871788, + "eval_loss": 0.8037068843841553, + "eval_precision": 0.9116618353975676, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.096, + "eval_samples_per_second": 445.653, + "eval_steps_per_second": 3.582, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 2.9988509341191512e-05, + "loss": 0.7139, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9090312949559141, + "eval_loss": 0.8076122403144836, + "eval_precision": 0.9077325865058469, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0646, + "eval_samples_per_second": 447.386, + "eval_steps_per_second": 3.596, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 2.693558149116355e-05, + "loss": 0.7075, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 2.3963031504938934e-05, + "loss": 0.7039, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9106544633502627, + "eval_loss": 0.8057740330696106, + "eval_precision": 0.90831405839241, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.72, + "eval_samples_per_second": 467.356, + "eval_steps_per_second": 3.756, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 2.1090367568425905e-05, + "loss": 0.7085, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9111172577095981, + "eval_loss": 0.8054983019828796, + "eval_precision": 0.909930572772933, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.5555, + "eval_samples_per_second": 421.717, + "eval_steps_per_second": 3.39, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.8336442337554523e-05, + "loss": 0.7058, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.57193292124796e-05, + "loss": 0.704, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9111130521102017, + "eval_loss": 0.8064684867858887, + "eval_precision": 0.9095999977150472, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.5384, + "eval_samples_per_second": 422.56, + "eval_steps_per_second": 3.396, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 1.3256203725866163e-05, + "loss": 0.7048, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 1.0963230823680033e-05, + "loss": 0.7036, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9180278111340481, + "eval_loss": 0.7965641021728516, + "eval_precision": 0.9164621430941197, + "eval_recall": 0.9210088691796009, + "eval_runtime": 8.5667, + "eval_samples_per_second": 421.165, + "eval_steps_per_second": 3.385, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 8.855458778234772e-06, + "loss": 0.7023, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9138698060860845, + "eval_loss": 0.8049460053443909, + "eval_precision": 0.9133453310746181, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.1975, + "eval_samples_per_second": 440.133, + "eval_steps_per_second": 3.538, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 6.946720429719858e-06, + "loss": 0.7033, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 5.24954240434024e-06, + "loss": 0.6996, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9159791246804138, + "eval_loss": 0.79762864112854, + "eval_precision": 0.9154619757757954, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.937, + "eval_samples_per_second": 454.579, + "eval_steps_per_second": 3.654, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.7750629048484066e-06, + "loss": 0.7, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.532958612991398e-06, + "loss": 0.6973, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9160555463777809, + "eval_loss": 0.802642285823822, + "eval_precision": 0.9140200001689326, + "eval_recall": 0.9196230598669624, + "eval_runtime": 7.8782, + "eval_samples_per_second": 457.972, + "eval_steps_per_second": 3.681, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.5313811835959666e-06, + "loss": 0.7019, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9139763003067741, + "eval_loss": 0.803246021270752, + "eval_precision": 0.9129349370238264, + "eval_recall": 0.9185144124168514, + "eval_runtime": 8.1889, + "eval_samples_per_second": 440.597, + "eval_steps_per_second": 3.541, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 7.769037470668707e-07, + "loss": 0.7018, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.744777713907088e-07, + "loss": 0.7025, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_f1": 0.9182401356334213, + "eval_loss": 0.7972019910812378, + "eval_precision": 0.9150914553528028, + "eval_recall": 0.9232261640798226, + "eval_runtime": 8.2025, + "eval_samples_per_second": 439.866, + "eval_steps_per_second": 3.536, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 2.7400566750334408e-08, + "loss": 0.7025, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9196230598669624, + "eval_f1": 0.9151619197392599, + "eval_loss": 0.8054646253585815, + "eval_precision": 0.9138987669596383, + "eval_recall": 0.9196230598669624, + "eval_runtime": 8.3649, + "eval_samples_per_second": 431.328, + "eval_steps_per_second": 3.467, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.020871199296353588, + "learning_rate": 7.836623587794027e-05, + "metric": "eval/loss", + "weight_decay": 0.011144668170968723 + } +} diff --git a/run-qz988uch/checkpoint-630/training_args.bin b/run-qz988uch/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e10627af6f463b842fdbeee79f083956d2dc1624 --- /dev/null +++ b/run-qz988uch/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82697f56d621790309f3719e38f39541ada3d586bec1752f44fd875b2679e433 +size 4792 diff --git a/run-r5dfh0r8/checkpoint-1232/model.safetensors b/run-r5dfh0r8/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e171e187b1294f413ce5d616f653ca068c02ea4e --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f360d5cae73d8ebd7d54c84db5bad7165ceda4498d716f0f16bce10169a176 +size 198025308 diff --git a/run-r5dfh0r8/checkpoint-1232/optimizer.pt b/run-r5dfh0r8/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5686700130b41006cd7c753fa83fdcca7b6c1c7 --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5252485454faf90e02d47284a04360d909ce5a56424cbea6bdbc1e87e011fab5 +size 395900602 diff --git a/run-r5dfh0r8/checkpoint-1232/rng_state.pth b/run-r5dfh0r8/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-r5dfh0r8/checkpoint-1232/scheduler.pt b/run-r5dfh0r8/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a80e59ead7a6e0fab94666d3dddb5745a950382a --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7985439720a8aa6638e9e484a3cf1ddfe3560914ea1fc46f4bac0c1f38f3e78 +size 1064 diff --git a/run-r5dfh0r8/checkpoint-1232/trainer_state.json b/run-r5dfh0r8/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b0126ffe66d7bcf0b8a46ad51cad7a823edfec77 --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9201773835920177, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-r5dfh0r8/checkpoint-1232", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.1317400832005625e-05, + "loss": 1.2464, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.886640798226164, + "eval_loss": 0.8996214866638184, + "eval_runtime": 6.8338, + "eval_samples_per_second": 527.963, + "eval_steps_per_second": 8.341, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00010263480166401125, + "loss": 0.882, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00015395220249601687, + "loss": 0.8135, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8239731192588806, + "eval_runtime": 7.3316, + "eval_samples_per_second": 492.115, + "eval_steps_per_second": 7.775, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002052696033280225, + "loss": 0.8045, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8991130820399114, + "eval_loss": 0.8360968828201294, + "eval_runtime": 7.1131, + "eval_samples_per_second": 507.234, + "eval_steps_per_second": 8.013, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00025658700416002813, + "loss": 0.7946, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00030790440499203374, + "loss": 0.7879, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6962305986696231, + "eval_loss": 1.1449085474014282, + "eval_runtime": 6.9491, + "eval_samples_per_second": 519.205, + "eval_steps_per_second": 8.203, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00035922180582403934, + "loss": 0.7875, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.000410539206656045, + "loss": 0.7828, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.7009423503325942, + "eval_loss": 1.1168012619018555, + "eval_runtime": 6.9876, + "eval_samples_per_second": 516.34, + "eval_steps_per_second": 8.157, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004559258068639709, + "loss": 0.7906, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8605875831485588, + "eval_loss": 0.9142499566078186, + "eval_runtime": 7.101, + "eval_samples_per_second": 508.097, + "eval_steps_per_second": 8.027, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0004550424242390144, + "loss": 0.7896, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004527289743641841, + "loss": 0.7901, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.871119733924612, + "eval_loss": 0.8741796016693115, + "eval_runtime": 6.8366, + "eval_samples_per_second": 527.746, + "eval_steps_per_second": 8.337, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004490000268428782, + "loss": 0.7888, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0004438790657743496, + "loss": 0.7717, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.878880266075388, + "eval_loss": 0.8548110723495483, + "eval_runtime": 7.0878, + "eval_samples_per_second": 509.042, + "eval_steps_per_second": 8.042, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00043739834185596474, + "loss": 0.7658, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8551378846168518, + "eval_runtime": 6.8986, + "eval_samples_per_second": 523.005, + "eval_steps_per_second": 8.263, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00042959866927533605, + "loss": 0.7742, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004205291686714583, + "loss": 0.7601, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.893569844789357, + "eval_loss": 0.8507728576660156, + "eval_runtime": 6.7763, + "eval_samples_per_second": 532.44, + "eval_steps_per_second": 8.412, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004102469577836241, + "loss": 0.7563, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8310302495956421, + "eval_runtime": 7.3272, + "eval_samples_per_second": 492.41, + "eval_steps_per_second": 7.779, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00039881679173634285, + "loss": 0.7512, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0003863106552256715, + "loss": 0.7422, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8894124168514412, + "eval_loss": 0.8518989086151123, + "eval_runtime": 6.7334, + "eval_samples_per_second": 535.837, + "eval_steps_per_second": 8.465, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00037280730917527865, + "loss": 0.7523, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003583917947173025, + "loss": 0.738, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.8551179766654968, + "eval_runtime": 6.8262, + "eval_samples_per_second": 528.555, + "eval_steps_per_second": 8.35, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00034315489762182466, + "loss": 0.7372, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8799889135254989, + "eval_loss": 0.8525415658950806, + "eval_runtime": 6.7253, + "eval_samples_per_second": 536.479, + "eval_steps_per_second": 8.475, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00032719257654786486, + "loss": 0.7441, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003106053587166481, + "loss": 0.726, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.826567530632019, + "eval_runtime": 6.5233, + "eval_samples_per_second": 553.094, + "eval_steps_per_second": 8.738, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00029349770681306217, + "loss": 0.7308, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00027597736110242173, + "loss": 0.7263, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8772172949002217, + "eval_loss": 0.9000593423843384, + "eval_runtime": 6.7912, + "eval_samples_per_second": 531.278, + "eval_steps_per_second": 8.393, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00025815466090574633, + "loss": 0.7302, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8178864121437073, + "eval_runtime": 6.7525, + "eval_samples_per_second": 534.318, + "eval_steps_per_second": 8.441, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002401418497067496, + "loss": 0.7236, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00022205236826682793, + "loss": 0.7158, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8212663531303406, + "eval_runtime": 7.0152, + "eval_samples_per_second": 514.315, + "eval_steps_per_second": 8.125, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00020400014019985495, + "loss": 0.7085, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00018609885450607592, + "loss": 0.7132, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9079822616407982, + "eval_loss": 0.8249314427375793, + "eval_runtime": 6.903, + "eval_samples_per_second": 522.672, + "eval_steps_per_second": 8.257, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00016846124958354788, + "loss": 0.7123, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.824931263923645, + "eval_runtime": 6.7493, + "eval_samples_per_second": 534.575, + "eval_steps_per_second": 8.445, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0001511984032262651, + "loss": 0.707, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0001344190330804078, + "loss": 0.7026, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8245134353637695, + "eval_runtime": 6.9369, + "eval_samples_per_second": 520.114, + "eval_steps_per_second": 8.217, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00011822881196428688, + "loss": 0.6974, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8201560974121094, + "eval_runtime": 6.5274, + "eval_samples_per_second": 552.748, + "eval_steps_per_second": 8.732, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00010272970236395156, + "loss": 0.7008, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 8.801931429566253e-05, + "loss": 0.6996, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8126341104507446, + "eval_runtime": 6.8301, + "eval_samples_per_second": 528.253, + "eval_steps_per_second": 8.345, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 7.419029057926998e-05, + "loss": 0.6982, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 6.132972339391367e-05, + "loss": 0.6936, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8118070363998413, + "eval_runtime": 6.7715, + "eval_samples_per_second": 532.818, + "eval_steps_per_second": 8.418, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.951860579044968e-05, + "loss": 0.6903, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8067470192909241, + "eval_runtime": 6.9673, + "eval_samples_per_second": 517.845, + "eval_steps_per_second": 8.181, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.883132161485732e-05, + "loss": 0.6938, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.9335177054979378e-05, + "loss": 0.6901, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8188725113868713, + "eval_runtime": 6.8809, + "eval_samples_per_second": 524.347, + "eval_steps_per_second": 8.284, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.108997676081227e-05, + "loss": 0.6905, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.41476472078462e-05, + "loss": 0.6946, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8086256980895996, + "eval_runtime": 6.6543, + "eval_samples_per_second": 542.207, + "eval_steps_per_second": 8.566, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 8.551909675433608e-06, + "loss": 0.6919, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8124069571495056, + "eval_runtime": 7.0393, + "eval_samples_per_second": 512.549, + "eval_steps_per_second": 8.097, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 4.338004899695059e-06, + "loss": 0.6849, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.5324711350390228e-06, + "loss": 0.6874, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8066354990005493, + "eval_runtime": 6.9928, + "eval_samples_per_second": 515.959, + "eval_steps_per_second": 8.151, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0004559353689305115, + "metric": "eval/loss", + "warmup_ratio": 0.1828231281549339 + } +} diff --git a/run-r5dfh0r8/checkpoint-1232/training_args.bin b/run-r5dfh0r8/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6f84fb65ee9c32f563ed2a276f97425c42632f1 --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd31ed613341ad2e41e786fc23621baa4e5f8d6de127baf1072b4ec472e9f9d0 +size 4792 diff --git a/run-r5dfh0r8/checkpoint-1260/model.safetensors b/run-r5dfh0r8/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f57ada833c0c6399b7fb5d5df7ca70a7c71d3df --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91f65fcdfbc72f7f655482fa7578528c0394db45c11a5dd76f4044594cff2b2b +size 198025308 diff --git a/run-r5dfh0r8/checkpoint-1260/optimizer.pt b/run-r5dfh0r8/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..17de5d5fb9ee0f75bb12d69f1cf3d80693e65c43 --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4dd770addfa29fa0f3a7034828e8cccda407788ad8f5bc26fbd043c724e0319 +size 395900602 diff --git a/run-r5dfh0r8/checkpoint-1260/rng_state.pth b/run-r5dfh0r8/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-r5dfh0r8/checkpoint-1260/scheduler.pt b/run-r5dfh0r8/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a9f56290f12bdb9c9c92785c66c69fb60c4d79e --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8267396b7714bf1fc37f98dfab3cdbb04ac5387591792ec5fe0e96ffe6ba5121 +size 1064 diff --git a/run-r5dfh0r8/checkpoint-1260/trainer_state.json b/run-r5dfh0r8/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4ee90e08b3a66e4b5560a80631036587b2599e1f --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9215631929046563, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-r5dfh0r8/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 5.1317400832005625e-05, + "loss": 1.2464, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.886640798226164, + "eval_loss": 0.8996214866638184, + "eval_runtime": 6.8338, + "eval_samples_per_second": 527.963, + "eval_steps_per_second": 8.341, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00010263480166401125, + "loss": 0.882, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00015395220249601687, + "loss": 0.8135, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9035476718403548, + "eval_loss": 0.8239731192588806, + "eval_runtime": 7.3316, + "eval_samples_per_second": 492.115, + "eval_steps_per_second": 7.775, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002052696033280225, + "loss": 0.8045, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8991130820399114, + "eval_loss": 0.8360968828201294, + "eval_runtime": 7.1131, + "eval_samples_per_second": 507.234, + "eval_steps_per_second": 8.013, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00025658700416002813, + "loss": 0.7946, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00030790440499203374, + "loss": 0.7879, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6962305986696231, + "eval_loss": 1.1449085474014282, + "eval_runtime": 6.9491, + "eval_samples_per_second": 519.205, + "eval_steps_per_second": 8.203, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00035922180582403934, + "loss": 0.7875, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.000410539206656045, + "loss": 0.7828, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.7009423503325942, + "eval_loss": 1.1168012619018555, + "eval_runtime": 6.9876, + "eval_samples_per_second": 516.34, + "eval_steps_per_second": 8.157, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004559258068639709, + "loss": 0.7906, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8605875831485588, + "eval_loss": 0.9142499566078186, + "eval_runtime": 7.101, + "eval_samples_per_second": 508.097, + "eval_steps_per_second": 8.027, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0004550424242390144, + "loss": 0.7896, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004527289743641841, + "loss": 0.7901, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.871119733924612, + "eval_loss": 0.8741796016693115, + "eval_runtime": 6.8366, + "eval_samples_per_second": 527.746, + "eval_steps_per_second": 8.337, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004490000268428782, + "loss": 0.7888, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0004438790657743496, + "loss": 0.7717, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.878880266075388, + "eval_loss": 0.8548110723495483, + "eval_runtime": 7.0878, + "eval_samples_per_second": 509.042, + "eval_steps_per_second": 8.042, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00043739834185596474, + "loss": 0.7658, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8551378846168518, + "eval_runtime": 6.8986, + "eval_samples_per_second": 523.005, + "eval_steps_per_second": 8.263, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00042959866927533605, + "loss": 0.7742, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004205291686714583, + "loss": 0.7601, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.893569844789357, + "eval_loss": 0.8507728576660156, + "eval_runtime": 6.7763, + "eval_samples_per_second": 532.44, + "eval_steps_per_second": 8.412, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004102469577836241, + "loss": 0.7563, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8310302495956421, + "eval_runtime": 7.3272, + "eval_samples_per_second": 492.41, + "eval_steps_per_second": 7.779, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00039881679173634285, + "loss": 0.7512, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0003863106552256715, + "loss": 0.7422, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8894124168514412, + "eval_loss": 0.8518989086151123, + "eval_runtime": 6.7334, + "eval_samples_per_second": 535.837, + "eval_steps_per_second": 8.465, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00037280730917527865, + "loss": 0.7523, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0003583917947173025, + "loss": 0.738, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8938470066518847, + "eval_loss": 0.8551179766654968, + "eval_runtime": 6.8262, + "eval_samples_per_second": 528.555, + "eval_steps_per_second": 8.35, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00034315489762182466, + "loss": 0.7372, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8799889135254989, + "eval_loss": 0.8525415658950806, + "eval_runtime": 6.7253, + "eval_samples_per_second": 536.479, + "eval_steps_per_second": 8.475, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00032719257654786486, + "loss": 0.7441, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003106053587166481, + "loss": 0.726, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.826567530632019, + "eval_runtime": 6.5233, + "eval_samples_per_second": 553.094, + "eval_steps_per_second": 8.738, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00029349770681306217, + "loss": 0.7308, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00027597736110242173, + "loss": 0.7263, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8772172949002217, + "eval_loss": 0.9000593423843384, + "eval_runtime": 6.7912, + "eval_samples_per_second": 531.278, + "eval_steps_per_second": 8.393, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00025815466090574633, + "loss": 0.7302, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8178864121437073, + "eval_runtime": 6.7525, + "eval_samples_per_second": 534.318, + "eval_steps_per_second": 8.441, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0002401418497067496, + "loss": 0.7236, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00022205236826682793, + "loss": 0.7158, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8212663531303406, + "eval_runtime": 7.0152, + "eval_samples_per_second": 514.315, + "eval_steps_per_second": 8.125, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00020400014019985495, + "loss": 0.7085, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00018609885450607592, + "loss": 0.7132, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9079822616407982, + "eval_loss": 0.8249314427375793, + "eval_runtime": 6.903, + "eval_samples_per_second": 522.672, + "eval_steps_per_second": 8.257, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00016846124958354788, + "loss": 0.7123, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.824931263923645, + "eval_runtime": 6.7493, + "eval_samples_per_second": 534.575, + "eval_steps_per_second": 8.445, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0001511984032262651, + "loss": 0.707, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0001344190330804078, + "loss": 0.7026, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8245134353637695, + "eval_runtime": 6.9369, + "eval_samples_per_second": 520.114, + "eval_steps_per_second": 8.217, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00011822881196428688, + "loss": 0.6974, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8201560974121094, + "eval_runtime": 6.5274, + "eval_samples_per_second": 552.748, + "eval_steps_per_second": 8.732, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00010272970236395156, + "loss": 0.7008, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 8.801931429566253e-05, + "loss": 0.6996, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8126341104507446, + "eval_runtime": 6.8301, + "eval_samples_per_second": 528.253, + "eval_steps_per_second": 8.345, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 7.419029057926998e-05, + "loss": 0.6982, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 6.132972339391367e-05, + "loss": 0.6936, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8118070363998413, + "eval_runtime": 6.7715, + "eval_samples_per_second": 532.818, + "eval_steps_per_second": 8.418, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.951860579044968e-05, + "loss": 0.6903, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8067470192909241, + "eval_runtime": 6.9673, + "eval_samples_per_second": 517.845, + "eval_steps_per_second": 8.181, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.883132161485732e-05, + "loss": 0.6938, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.9335177054979378e-05, + "loss": 0.6901, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8188725113868713, + "eval_runtime": 6.8809, + "eval_samples_per_second": 524.347, + "eval_steps_per_second": 8.284, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 2.108997676081227e-05, + "loss": 0.6905, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.41476472078462e-05, + "loss": 0.6946, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8086256980895996, + "eval_runtime": 6.6543, + "eval_samples_per_second": 542.207, + "eval_steps_per_second": 8.566, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 8.551909675433608e-06, + "loss": 0.6919, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8124069571495056, + "eval_runtime": 7.0393, + "eval_samples_per_second": 512.549, + "eval_steps_per_second": 8.097, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 4.338004899695059e-06, + "loss": 0.6849, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.5324711350390228e-06, + "loss": 0.6874, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8066354990005493, + "eval_runtime": 6.9928, + "eval_samples_per_second": 515.959, + "eval_steps_per_second": 8.151, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.5297702201638808e-07, + "loss": 0.6908, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.8013138771057129, + "eval_runtime": 6.9188, + "eval_samples_per_second": 521.474, + "eval_steps_per_second": 8.238, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0004559353689305115, + "metric": "eval/loss", + "warmup_ratio": 0.1828231281549339 + } +} diff --git a/run-r5dfh0r8/checkpoint-1260/training_args.bin b/run-r5dfh0r8/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6f84fb65ee9c32f563ed2a276f97425c42632f1 --- /dev/null +++ b/run-r5dfh0r8/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd31ed613341ad2e41e786fc23621baa4e5f8d6de127baf1072b4ec472e9f9d0 +size 4792 diff --git a/run-r9dxcqqj/checkpoint-616/model.safetensors b/run-r9dxcqqj/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fbd7eefcab0d1943730c1f1f258b12bb135c34d0 --- /dev/null +++ b/run-r9dxcqqj/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a3d3e54deddc15290054d80f22a44486c28b440c6d06430a5ef93c73597d01c +size 198025308 diff --git a/run-r9dxcqqj/checkpoint-616/optimizer.pt b/run-r9dxcqqj/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5900bfaa57a5e2996af485579bd94e143142029d --- /dev/null +++ b/run-r9dxcqqj/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf12f64e60e30e12899f2e90f3411a1c62b096cfd515817271a2d5110e52e9c0 +size 395900602 diff --git a/run-r9dxcqqj/checkpoint-616/rng_state.pth b/run-r9dxcqqj/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-r9dxcqqj/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-r9dxcqqj/checkpoint-616/scheduler.pt b/run-r9dxcqqj/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ad5b7cd1db77155c8846830d630f2b46ef3aa95 --- /dev/null +++ b/run-r9dxcqqj/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13422500aed95b0ad74a09c545af72daab050b8b1db3173dbaacffff82d76c3a +size 1064 diff --git a/run-r9dxcqqj/checkpoint-616/trainer_state.json b/run-r9dxcqqj/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cf0ea987c3047c1a81b830a932cf8b5e192cf5ea --- /dev/null +++ b/run-r9dxcqqj/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.924351957466302, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-r9dxcqqj/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.325132725003175e-05, + "loss": 1.3782, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9606608748435974, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.0327, + "eval_samples_per_second": 449.167, + "eval_steps_per_second": 3.61, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 8.65026545000635e-05, + "loss": 0.9965, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012975398175009523, + "loss": 0.8679, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.887472976974382, + "eval_loss": 0.9356690049171448, + "eval_precision": 0.8859546502110112, + "eval_recall": 0.899390243902439, + "eval_runtime": 8.283, + "eval_samples_per_second": 435.59, + "eval_steps_per_second": 3.501, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.000173005309000127, + "loss": 0.8125, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9097618390083172, + "eval_loss": 0.8014576435089111, + "eval_precision": 0.9075232484913949, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.8903, + "eval_samples_per_second": 457.268, + "eval_steps_per_second": 3.675, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00021625663625015874, + "loss": 0.7961, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00025950796350019046, + "loss": 0.7798, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8885809312638581, + "eval_f1": 0.8638637817585579, + "eval_loss": 0.8500309586524963, + "eval_precision": 0.8838224512533309, + "eval_recall": 0.8885809312638581, + "eval_runtime": 7.7085, + "eval_samples_per_second": 468.053, + "eval_steps_per_second": 3.762, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00030275929075022223, + "loss": 0.7902, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003158467455662601, + "loss": 0.7664, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8611419068736141, + "eval_f1": 0.8708867331292167, + "eval_loss": 0.8969948887825012, + "eval_precision": 0.8929015329803001, + "eval_recall": 0.8611419068736141, + "eval_runtime": 7.9239, + "eval_samples_per_second": 455.329, + "eval_steps_per_second": 3.66, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003147504903599768, + "loss": 0.7777, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.9015287566030401, + "eval_loss": 0.8210821151733398, + "eval_precision": 0.9030778012262036, + "eval_recall": 0.9016075388026608, + "eval_runtime": 7.9993, + "eval_samples_per_second": 451.041, + "eval_steps_per_second": 3.625, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003127414185870322, + "loss": 0.7649, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00030983123233718757, + "loss": 0.7524, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9049345133786995, + "eval_loss": 0.8112013339996338, + "eval_precision": 0.9021847691187257, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.1909, + "eval_samples_per_second": 440.487, + "eval_steps_per_second": 3.54, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00030603688235415267, + "loss": 0.7472, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003013804693038593, + "loss": 0.7407, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.9005301603113456, + "eval_loss": 0.8247692584991455, + "eval_precision": 0.8988654794690789, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.142, + "eval_samples_per_second": 443.137, + "eval_steps_per_second": 3.562, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002958891150463693, + "loss": 0.7341, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8786031042128604, + "eval_f1": 0.8805050956885992, + "eval_loss": 0.8544660806655884, + "eval_precision": 0.8892450837096133, + "eval_recall": 0.8786031042128604, + "eval_runtime": 7.6623, + "eval_samples_per_second": 470.874, + "eval_steps_per_second": 3.785, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00028959480466120914, + "loss": 0.734, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0002825342001462746, + "loss": 0.7355, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8976370659053916, + "eval_loss": 0.8235156536102295, + "eval_precision": 0.8988403905300051, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.1626, + "eval_samples_per_second": 442.018, + "eval_steps_per_second": 3.553, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00027474842687543973, + "loss": 0.7277, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8758314855875832, + "eval_f1": 0.881482094338913, + "eval_loss": 0.8743150234222412, + "eval_precision": 0.8959003957384503, + "eval_recall": 0.8758314855875832, + "eval_runtime": 7.7881, + "eval_samples_per_second": 463.271, + "eval_steps_per_second": 3.724, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00026628283405867457, + "loss": 0.7346, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00025718673059990074, + "loss": 0.7224, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9065528657834572, + "eval_loss": 0.811353862285614, + "eval_precision": 0.904153985951234, + "eval_recall": 0.9096452328159645, + "eval_runtime": 8.0719, + "eval_samples_per_second": 446.983, + "eval_steps_per_second": 3.593, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00024751309789111507, + "loss": 0.7134, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00023731828121564689, + "loss": 0.7141, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.9043905849529116, + "eval_loss": 0.8205472230911255, + "eval_precision": 0.9014134676910035, + "eval_recall": 0.9085365853658537, + "eval_runtime": 8.0487, + "eval_samples_per_second": 448.272, + "eval_steps_per_second": 3.603, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00022666166155800948, + "loss": 0.7136, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.9036784801350952, + "eval_loss": 0.8140143156051636, + "eval_precision": 0.9007802943269283, + "eval_recall": 0.9101995565410199, + "eval_runtime": 7.9299, + "eval_samples_per_second": 454.985, + "eval_steps_per_second": 3.657, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00021560530973193124, + "loss": 0.7141, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00020421362484113897, + "loss": 0.7102, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.8979517727467885, + "eval_loss": 0.8260918855667114, + "eval_precision": 0.898006223897894, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0503, + "eval_samples_per_second": 448.181, + "eval_steps_per_second": 3.602, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00019255295917872583, + "loss": 0.703, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00018069123174992355, + "loss": 0.7102, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9061639274139788, + "eval_loss": 0.8162410259246826, + "eval_precision": 0.904766889299943, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.6596, + "eval_samples_per_second": 471.041, + "eval_steps_per_second": 3.786, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001686975326693633, + "loss": 0.702, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9079822616407982, + "eval_f1": 0.9068388836587531, + "eval_loss": 0.8246458768844604, + "eval_precision": 0.9077694792541153, + "eval_recall": 0.9079822616407982, + "eval_runtime": 7.9232, + "eval_samples_per_second": 455.374, + "eval_steps_per_second": 3.66, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00015664172073706417, + "loss": 0.6981, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00014459401653711681, + "loss": 0.6986, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9098240627683561, + "eval_loss": 0.8136833310127258, + "eval_precision": 0.9073652093528346, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.0273, + "eval_samples_per_second": 449.465, + "eval_steps_per_second": 3.613, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013262459342911108, + "loss": 0.6993, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00012080316881462824, + "loss": 0.7006, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9068508702261913, + "eval_loss": 0.8176115155220032, + "eval_precision": 0.9042737044962987, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.9487, + "eval_samples_per_second": 453.909, + "eval_steps_per_second": 3.648, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00010919859805951805, + "loss": 0.6972, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.905945706396145, + "eval_loss": 0.8245837688446045, + "eval_precision": 0.9074692562771068, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.0925, + "eval_samples_per_second": 445.846, + "eval_steps_per_second": 3.584, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 9.787847343721023e-05, + "loss": 0.6975, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 8.690873042906623e-05, + "loss": 0.6932, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9110710956391731, + "eval_loss": 0.8100480437278748, + "eval_precision": 0.9107715781590288, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1271, + "eval_samples_per_second": 443.946, + "eval_steps_per_second": 3.568, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 7.635326367492293e-05, + "loss": 0.693, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9103169414289777, + "eval_loss": 0.812038242816925, + "eval_precision": 0.9072172490083962, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.7899, + "eval_samples_per_second": 463.166, + "eval_steps_per_second": 3.723, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.627355481077376e-05, + "loss": 0.6942, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.672831436129182e-05, + "loss": 0.691, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9145523792603321, + "eval_loss": 0.8048750758171082, + "eval_precision": 0.9128329741840425, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.6855, + "eval_samples_per_second": 469.455, + "eval_steps_per_second": 3.773, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 4.777313977303747e-05, + "loss": 0.6915, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.946019158017908e-05, + "loss": 0.6876, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.911974079430732, + "eval_loss": 0.813626766204834, + "eval_precision": 0.9101817096300148, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.908, + "eval_samples_per_second": 456.244, + "eval_steps_per_second": 3.667, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.1837889588939344e-05, + "loss": 0.6896, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9142379941944394, + "eval_loss": 0.8089220523834229, + "eval_precision": 0.9115865629267781, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.8381, + "eval_samples_per_second": 460.317, + "eval_steps_per_second": 3.7, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.4950630850379195e-05, + "loss": 0.6843, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.8838531064222727e-05, + "loss": 0.6857, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9171451121201809, + "eval_loss": 0.8024230003356934, + "eval_precision": 0.9150678742214442, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.6853, + "eval_samples_per_second": 469.47, + "eval_steps_per_second": 3.773, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.3537190919947792e-05, + "loss": 0.6868, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.077488736118436e-06, + "loss": 0.6867, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9103411226447043, + "eval_loss": 0.8090687394142151, + "eval_precision": 0.9066728855656762, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.8333, + "eval_samples_per_second": 460.6, + "eval_steps_per_second": 3.702, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.485400605757511e-06, + "loss": 0.6858, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9126031903620105, + "eval_loss": 0.8064006567001343, + "eval_precision": 0.9088671768542353, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8883, + "eval_samples_per_second": 457.384, + "eval_steps_per_second": 3.676, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.7818490953446472e-06, + "loss": 0.6884, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 9.825813787115542e-07, + "loss": 0.6879, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9279379157427938, + "eval_f1": 0.924351957466302, + "eval_loss": 0.7972683310508728, + "eval_precision": 0.9223103693564321, + "eval_recall": 0.9279379157427938, + "eval_runtime": 8.0856, + "eval_samples_per_second": 446.226, + "eval_steps_per_second": 3.587, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3825006695314702, + "learning_rate": 0.0003160673914425397, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-r9dxcqqj/checkpoint-616/training_args.bin b/run-r9dxcqqj/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ed0aa3cc2679b0e9fd87c0e6a789c3258dbc863b --- /dev/null +++ b/run-r9dxcqqj/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df14c2278c9a6353b8fc74fdf3b6705085e77d9601ba12641795e4aa8839dc47 +size 4792 diff --git a/run-r9dxcqqj/checkpoint-630/model.safetensors b/run-r9dxcqqj/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a715fe3428895be633d99bfc2c9d6d696aa7a0fa --- /dev/null +++ b/run-r9dxcqqj/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91fb0164c9717d37151034330d5509cde9d83b15246ef439d3c8683a13bb2390 +size 198025308 diff --git a/run-r9dxcqqj/checkpoint-630/optimizer.pt b/run-r9dxcqqj/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cb8349125599ae7ccdcf40faedf51c1cff3fd3b --- /dev/null +++ b/run-r9dxcqqj/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40af8984993de3d8d29d1a06a8ab8bff7f3f3d1f6590fc152c297000eb21a611 +size 395900602 diff --git a/run-r9dxcqqj/checkpoint-630/rng_state.pth b/run-r9dxcqqj/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-r9dxcqqj/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-r9dxcqqj/checkpoint-630/scheduler.pt b/run-r9dxcqqj/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e08044f46480dc1cdc09db11ce29051b2fedabb7 --- /dev/null +++ b/run-r9dxcqqj/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d1050fb4f408465406462dc4fcc90b6a8a833c022c6a257c0d27df0f072f128 +size 1064 diff --git a/run-r9dxcqqj/checkpoint-630/trainer_state.json b/run-r9dxcqqj/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2191379a21456cb1afcf5822816c04b09144eb98 --- /dev/null +++ b/run-r9dxcqqj/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.924351957466302, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-r9dxcqqj/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 4.325132725003175e-05, + "loss": 1.3782, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9606608748435974, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.0327, + "eval_samples_per_second": 449.167, + "eval_steps_per_second": 3.61, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 8.65026545000635e-05, + "loss": 0.9965, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00012975398175009523, + "loss": 0.8679, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.887472976974382, + "eval_loss": 0.9356690049171448, + "eval_precision": 0.8859546502110112, + "eval_recall": 0.899390243902439, + "eval_runtime": 8.283, + "eval_samples_per_second": 435.59, + "eval_steps_per_second": 3.501, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.000173005309000127, + "loss": 0.8125, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9097618390083172, + "eval_loss": 0.8014576435089111, + "eval_precision": 0.9075232484913949, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.8903, + "eval_samples_per_second": 457.268, + "eval_steps_per_second": 3.675, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00021625663625015874, + "loss": 0.7961, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00025950796350019046, + "loss": 0.7798, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8885809312638581, + "eval_f1": 0.8638637817585579, + "eval_loss": 0.8500309586524963, + "eval_precision": 0.8838224512533309, + "eval_recall": 0.8885809312638581, + "eval_runtime": 7.7085, + "eval_samples_per_second": 468.053, + "eval_steps_per_second": 3.762, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00030275929075022223, + "loss": 0.7902, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003158467455662601, + "loss": 0.7664, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8611419068736141, + "eval_f1": 0.8708867331292167, + "eval_loss": 0.8969948887825012, + "eval_precision": 0.8929015329803001, + "eval_recall": 0.8611419068736141, + "eval_runtime": 7.9239, + "eval_samples_per_second": 455.329, + "eval_steps_per_second": 3.66, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0003147504903599768, + "loss": 0.7777, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.9015287566030401, + "eval_loss": 0.8210821151733398, + "eval_precision": 0.9030778012262036, + "eval_recall": 0.9016075388026608, + "eval_runtime": 7.9993, + "eval_samples_per_second": 451.041, + "eval_steps_per_second": 3.625, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003127414185870322, + "loss": 0.7649, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00030983123233718757, + "loss": 0.7524, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9049345133786995, + "eval_loss": 0.8112013339996338, + "eval_precision": 0.9021847691187257, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.1909, + "eval_samples_per_second": 440.487, + "eval_steps_per_second": 3.54, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00030603688235415267, + "loss": 0.7472, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0003013804693038593, + "loss": 0.7407, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.9005301603113456, + "eval_loss": 0.8247692584991455, + "eval_precision": 0.8988654794690789, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.142, + "eval_samples_per_second": 443.137, + "eval_steps_per_second": 3.562, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002958891150463693, + "loss": 0.7341, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8786031042128604, + "eval_f1": 0.8805050956885992, + "eval_loss": 0.8544660806655884, + "eval_precision": 0.8892450837096133, + "eval_recall": 0.8786031042128604, + "eval_runtime": 7.6623, + "eval_samples_per_second": 470.874, + "eval_steps_per_second": 3.785, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00028959480466120914, + "loss": 0.734, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0002825342001462746, + "loss": 0.7355, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.8976370659053916, + "eval_loss": 0.8235156536102295, + "eval_precision": 0.8988403905300051, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.1626, + "eval_samples_per_second": 442.018, + "eval_steps_per_second": 3.553, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00027474842687543973, + "loss": 0.7277, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8758314855875832, + "eval_f1": 0.881482094338913, + "eval_loss": 0.8743150234222412, + "eval_precision": 0.8959003957384503, + "eval_recall": 0.8758314855875832, + "eval_runtime": 7.7881, + "eval_samples_per_second": 463.271, + "eval_steps_per_second": 3.724, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00026628283405867457, + "loss": 0.7346, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00025718673059990074, + "loss": 0.7224, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9096452328159645, + "eval_f1": 0.9065528657834572, + "eval_loss": 0.811353862285614, + "eval_precision": 0.904153985951234, + "eval_recall": 0.9096452328159645, + "eval_runtime": 8.0719, + "eval_samples_per_second": 446.983, + "eval_steps_per_second": 3.593, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00024751309789111507, + "loss": 0.7134, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00023731828121564689, + "loss": 0.7141, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9085365853658537, + "eval_f1": 0.9043905849529116, + "eval_loss": 0.8205472230911255, + "eval_precision": 0.9014134676910035, + "eval_recall": 0.9085365853658537, + "eval_runtime": 8.0487, + "eval_samples_per_second": 448.272, + "eval_steps_per_second": 3.603, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00022666166155800948, + "loss": 0.7136, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.9036784801350952, + "eval_loss": 0.8140143156051636, + "eval_precision": 0.9007802943269283, + "eval_recall": 0.9101995565410199, + "eval_runtime": 7.9299, + "eval_samples_per_second": 454.985, + "eval_steps_per_second": 3.657, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00021560530973193124, + "loss": 0.7141, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00020421362484113897, + "loss": 0.7102, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.8979517727467885, + "eval_loss": 0.8260918855667114, + "eval_precision": 0.898006223897894, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0503, + "eval_samples_per_second": 448.181, + "eval_steps_per_second": 3.602, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00019255295917872583, + "loss": 0.703, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00018069123174992355, + "loss": 0.7102, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9061639274139788, + "eval_loss": 0.8162410259246826, + "eval_precision": 0.904766889299943, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.6596, + "eval_samples_per_second": 471.041, + "eval_steps_per_second": 3.786, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001686975326693633, + "loss": 0.702, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9079822616407982, + "eval_f1": 0.9068388836587531, + "eval_loss": 0.8246458768844604, + "eval_precision": 0.9077694792541153, + "eval_recall": 0.9079822616407982, + "eval_runtime": 7.9232, + "eval_samples_per_second": 455.374, + "eval_steps_per_second": 3.66, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00015664172073706417, + "loss": 0.6981, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00014459401653711681, + "loss": 0.6986, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9098240627683561, + "eval_loss": 0.8136833310127258, + "eval_precision": 0.9073652093528346, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.0273, + "eval_samples_per_second": 449.465, + "eval_steps_per_second": 3.613, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013262459342911108, + "loss": 0.6993, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00012080316881462824, + "loss": 0.7006, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9068508702261913, + "eval_loss": 0.8176115155220032, + "eval_precision": 0.9042737044962987, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.9487, + "eval_samples_per_second": 453.909, + "eval_steps_per_second": 3.648, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00010919859805951805, + "loss": 0.6972, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.905945706396145, + "eval_loss": 0.8245837688446045, + "eval_precision": 0.9074692562771068, + "eval_recall": 0.9068736141906873, + "eval_runtime": 8.0925, + "eval_samples_per_second": 445.846, + "eval_steps_per_second": 3.584, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 9.787847343721023e-05, + "loss": 0.6975, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 8.690873042906623e-05, + "loss": 0.6932, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9110710956391731, + "eval_loss": 0.8100480437278748, + "eval_precision": 0.9107715781590288, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1271, + "eval_samples_per_second": 443.946, + "eval_steps_per_second": 3.568, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 7.635326367492293e-05, + "loss": 0.693, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9103169414289777, + "eval_loss": 0.812038242816925, + "eval_precision": 0.9072172490083962, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.7899, + "eval_samples_per_second": 463.166, + "eval_steps_per_second": 3.723, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 6.627355481077376e-05, + "loss": 0.6942, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 5.672831436129182e-05, + "loss": 0.691, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9145523792603321, + "eval_loss": 0.8048750758171082, + "eval_precision": 0.9128329741840425, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.6855, + "eval_samples_per_second": 469.455, + "eval_steps_per_second": 3.773, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 4.777313977303747e-05, + "loss": 0.6915, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.946019158017908e-05, + "loss": 0.6876, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.911974079430732, + "eval_loss": 0.813626766204834, + "eval_precision": 0.9101817096300148, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.908, + "eval_samples_per_second": 456.244, + "eval_steps_per_second": 3.667, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 3.1837889588939344e-05, + "loss": 0.6896, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9142379941944394, + "eval_loss": 0.8089220523834229, + "eval_precision": 0.9115865629267781, + "eval_recall": 0.9176829268292683, + "eval_runtime": 7.8381, + "eval_samples_per_second": 460.317, + "eval_steps_per_second": 3.7, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.4950630850379195e-05, + "loss": 0.6843, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.8838531064222727e-05, + "loss": 0.6857, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9210088691796009, + "eval_f1": 0.9171451121201809, + "eval_loss": 0.8024230003356934, + "eval_precision": 0.9150678742214442, + "eval_recall": 0.9210088691796009, + "eval_runtime": 7.6853, + "eval_samples_per_second": 469.47, + "eval_steps_per_second": 3.773, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.3537190919947792e-05, + "loss": 0.6868, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 9.077488736118436e-06, + "loss": 0.6867, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9103411226447043, + "eval_loss": 0.8090687394142151, + "eval_precision": 0.9066728855656762, + "eval_recall": 0.9146341463414634, + "eval_runtime": 7.8333, + "eval_samples_per_second": 460.6, + "eval_steps_per_second": 3.702, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 5.485400605757511e-06, + "loss": 0.6858, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9126031903620105, + "eval_loss": 0.8064006567001343, + "eval_precision": 0.9088671768542353, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.8883, + "eval_samples_per_second": 457.384, + "eval_steps_per_second": 3.676, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.7818490953446472e-06, + "loss": 0.6884, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 9.825813787115542e-07, + "loss": 0.6879, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9279379157427938, + "eval_f1": 0.924351957466302, + "eval_loss": 0.7972683310508728, + "eval_precision": 0.9223103693564321, + "eval_recall": 0.9279379157427938, + "eval_runtime": 8.0856, + "eval_samples_per_second": 446.226, + "eval_steps_per_second": 3.587, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 9.807751565904419e-08, + "loss": 0.6834, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9185144124168514, + "eval_f1": 0.9143520661647251, + "eval_loss": 0.8109614253044128, + "eval_precision": 0.912354186742865, + "eval_recall": 0.9185144124168514, + "eval_runtime": 7.815, + "eval_samples_per_second": 461.676, + "eval_steps_per_second": 3.711, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3825006695314702, + "learning_rate": 0.0003160673914425397, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-r9dxcqqj/checkpoint-630/training_args.bin b/run-r9dxcqqj/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ed0aa3cc2679b0e9fd87c0e6a789c3258dbc863b --- /dev/null +++ b/run-r9dxcqqj/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df14c2278c9a6353b8fc74fdf3b6705085e77d9601ba12641795e4aa8839dc47 +size 4792 diff --git a/run-rc8xrzwa/checkpoint-1232/model.safetensors b/run-rc8xrzwa/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59254423ff1fa63859b05da43168ebcd6e8f8980 --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22d5baa10f9443f2e1591e59809d581752c0f92d6cd167e9f24be64d040ed363 +size 198025308 diff --git a/run-rc8xrzwa/checkpoint-1232/optimizer.pt b/run-rc8xrzwa/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..59f9ba14ae140d8fa04d7c14566d4e3f79099775 --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76e054594a94d0663b9cb11928ea303379c87b0007ae2a8f6dae90472aa20c8 +size 395900602 diff --git a/run-rc8xrzwa/checkpoint-1232/rng_state.pth b/run-rc8xrzwa/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-rc8xrzwa/checkpoint-1232/scheduler.pt b/run-rc8xrzwa/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bc2ec794b19afa129298df434fd0ffa283e754e --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a731cb03660a0941060c1a3151399900adbfb8378891d8ce7ca64e4697bb75 +size 1064 diff --git a/run-rc8xrzwa/checkpoint-1232/trainer_state.json b/run-rc8xrzwa/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a9d53ea98498e9b43c2f38152fde2fa41b4521fc --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9237804878048781, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-rc8xrzwa/checkpoint-1190", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.575567675218956e-05, + "loss": 1.3433, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8558758314855875, + "eval_loss": 0.9417877793312073, + "eval_runtime": 6.5958, + "eval_samples_per_second": 547.015, + "eval_steps_per_second": 8.642, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 5.151135350437912e-05, + "loss": 0.9416, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.726703025656867e-05, + "loss": 0.8474, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8186371922492981, + "eval_runtime": 7.1783, + "eval_samples_per_second": 502.627, + "eval_steps_per_second": 7.941, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010302270700875824, + "loss": 0.8109, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8158596158027649, + "eval_runtime": 6.7084, + "eval_samples_per_second": 537.835, + "eval_steps_per_second": 8.497, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001287783837609478, + "loss": 0.7951, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015453406051313734, + "loss": 0.7835, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8325737714767456, + "eval_runtime": 6.9126, + "eval_samples_per_second": 521.942, + "eval_steps_per_second": 8.246, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00016935046366882469, + "loss": 0.7772, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00016891107842452967, + "loss": 0.7697, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8056278228759766, + "eval_runtime": 6.7409, + "eval_samples_per_second": 535.242, + "eval_steps_per_second": 8.456, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00016799813420346416, + "loss": 0.765, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8090510368347168, + "eval_runtime": 6.9363, + "eval_samples_per_second": 520.16, + "eval_steps_per_second": 8.218, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016661676471535223, + "loss": 0.7549, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00016477473774031664, + "loss": 0.7479, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8072544932365417, + "eval_runtime": 6.8482, + "eval_samples_per_second": 526.856, + "eval_steps_per_second": 8.323, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00016248241144874233, + "loss": 0.751, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001597526761547386, + "loss": 0.7423, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8027817606925964, + "eval_runtime": 6.978, + "eval_samples_per_second": 517.056, + "eval_steps_per_second": 8.169, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001566008818307348, + "loss": 0.733, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.81928551197052, + "eval_runtime": 6.5386, + "eval_samples_per_second": 551.803, + "eval_steps_per_second": 8.718, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.000153044751790813, + "loss": 0.735, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00014910428302815717, + "loss": 0.7285, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8058004975318909, + "eval_runtime": 6.6564, + "eval_samples_per_second": 542.036, + "eval_steps_per_second": 8.563, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00014480163376704585, + "loss": 0.7271, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8049086332321167, + "eval_runtime": 6.8815, + "eval_samples_per_second": 524.308, + "eval_steps_per_second": 8.283, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00014016099886171143, + "loss": 0.7237, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00013520847374272853, + "loss": 0.7194, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8057820796966553, + "eval_runtime": 6.9273, + "eval_samples_per_second": 520.84, + "eval_steps_per_second": 8.228, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012997190767599458, + "loss": 0.7183, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00012448074715946326, + "loss": 0.7183, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8052031397819519, + "eval_runtime": 6.6594, + "eval_samples_per_second": 541.789, + "eval_steps_per_second": 8.559, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011876587033824966, + "loss": 0.7096, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8224084973335266, + "eval_runtime": 6.8143, + "eval_samples_per_second": 529.473, + "eval_steps_per_second": 8.365, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0001128594133692317, + "loss": 0.7168, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010679458971154177, + "loss": 0.7048, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.82257080078125, + "eval_runtime": 6.8795, + "eval_samples_per_second": 524.455, + "eval_steps_per_second": 8.285, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00010060550335912358, + "loss": 0.7102, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.432695706559268e-05, + "loss": 0.7041, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8133431673049927, + "eval_runtime": 6.8419, + "eval_samples_per_second": 527.341, + "eval_steps_per_second": 8.331, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.799425663979943e-05, + "loss": 0.705, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8141534328460693, + "eval_runtime": 6.5877, + "eval_samples_per_second": 547.688, + "eval_steps_per_second": 8.652, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.164301241258918e-05, + "loss": 0.6984, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.530893899115998e-05, + "loss": 0.7021, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8113471269607544, + "eval_runtime": 6.6207, + "eval_samples_per_second": 544.959, + "eval_steps_per_second": 8.609, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.902765442704897e-05, + "loss": 0.6988, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.283447992707507e-05, + "loss": 0.6975, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8156539797782898, + "eval_runtime": 6.8742, + "eval_samples_per_second": 524.86, + "eval_steps_per_second": 8.292, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.676424123351217e-05, + "loss": 0.7033, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8037310242652893, + "eval_runtime": 6.4458, + "eval_samples_per_second": 559.749, + "eval_steps_per_second": 8.843, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.085107279038151e-05, + "loss": 0.6945, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.5128225797084e-05, + "loss": 0.6921, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8072209358215332, + "eval_runtime": 6.7148, + "eval_samples_per_second": 537.325, + "eval_steps_per_second": 8.489, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.962788122873377e-05, + "loss": 0.6907, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8109329342842102, + "eval_runtime": 6.744, + "eval_samples_per_second": 534.997, + "eval_steps_per_second": 8.452, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.438096887462652e-05, + "loss": 0.6956, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.941699341243256e-05, + "loss": 0.6922, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8046855330467224, + "eval_runtime": 6.8718, + "eval_samples_per_second": 525.041, + "eval_steps_per_second": 8.295, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.4763868496143275e-05, + "loss": 0.692, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.0447759790734464e-05, + "loss": 0.6902, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.8009201884269714, + "eval_runtime": 6.8715, + "eval_samples_per_second": 525.07, + "eval_steps_per_second": 8.295, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.6492937836201666e-05, + "loss": 0.687, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.8014901280403137, + "eval_runtime": 6.5784, + "eval_samples_per_second": 548.458, + "eval_steps_per_second": 8.665, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.2921641568348874e-05, + "loss": 0.6886, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 9.753953263786964e-06, + "loss": 0.6866, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.804236650466919, + "eval_runtime": 6.9602, + "eval_samples_per_second": 518.379, + "eval_steps_per_second": 8.189, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 7.0076856123569005e-06, + "loss": 0.6867, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.698281551996917e-06, + "loss": 0.6913, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9235033259423503, + "eval_loss": 0.798970639705658, + "eval_runtime": 6.845, + "eval_samples_per_second": 527.098, + "eval_steps_per_second": 8.327, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.8387274293072607e-06, + "loss": 0.688, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9237804878048781, + "eval_loss": 0.801655650138855, + "eval_runtime": 6.3824, + "eval_samples_per_second": 565.303, + "eval_steps_per_second": 8.931, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.4394799741316684e-06, + "loss": 0.6865, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 5.084074987955168e-07, + "loss": 0.6851, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.8011634349822998, + "eval_runtime": 6.5916, + "eval_samples_per_second": 547.362, + "eval_steps_per_second": 8.647, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00016939310479324672, + "metric": "eval/loss", + "warmup_ratio": 0.13517961452605504 + } +} diff --git a/run-rc8xrzwa/checkpoint-1232/training_args.bin b/run-rc8xrzwa/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a5a97fd93516d9d41489e5942ae1cc73501d46a --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:468eada5611f929bd71067ca8c966af333948a64ab2bb29e2d4cfa90b0354b5c +size 4792 diff --git a/run-rc8xrzwa/checkpoint-1260/model.safetensors b/run-rc8xrzwa/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3e2ce3c12702d830d356c1c3491e4face57abfd --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f3b4e0ffa2850b4486dffa4c31e292dd635bfa4ce6c69b5b106007dd599430 +size 198025308 diff --git a/run-rc8xrzwa/checkpoint-1260/optimizer.pt b/run-rc8xrzwa/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ba8387a86a1f0fbbad95945b7004a7b511915f4 --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27bd2b0d39f6fe092cd2039156e6476c083f990df921aff7f0bf226ddd397d66 +size 395900602 diff --git a/run-rc8xrzwa/checkpoint-1260/rng_state.pth b/run-rc8xrzwa/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-rc8xrzwa/checkpoint-1260/scheduler.pt b/run-rc8xrzwa/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e21fd9f0963019f72aed0431fc384197616a6087 --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5923e6dfce0e2bd7ba0efb5eb011f1c4fd4ac16f25beb85d912b38f458c21db6 +size 1064 diff --git a/run-rc8xrzwa/checkpoint-1260/trainer_state.json b/run-rc8xrzwa/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9f364a3737cbe63462720210b0f7b135bcda0dc8 --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9279379157427938, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-rc8xrzwa/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.575567675218956e-05, + "loss": 1.3433, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8558758314855875, + "eval_loss": 0.9417877793312073, + "eval_runtime": 6.5958, + "eval_samples_per_second": 547.015, + "eval_steps_per_second": 8.642, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 5.151135350437912e-05, + "loss": 0.9416, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.726703025656867e-05, + "loss": 0.8474, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8186371922492981, + "eval_runtime": 7.1783, + "eval_samples_per_second": 502.627, + "eval_steps_per_second": 7.941, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010302270700875824, + "loss": 0.8109, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8158596158027649, + "eval_runtime": 6.7084, + "eval_samples_per_second": 537.835, + "eval_steps_per_second": 8.497, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001287783837609478, + "loss": 0.7951, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015453406051313734, + "loss": 0.7835, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8325737714767456, + "eval_runtime": 6.9126, + "eval_samples_per_second": 521.942, + "eval_steps_per_second": 8.246, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00016935046366882469, + "loss": 0.7772, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00016891107842452967, + "loss": 0.7697, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8056278228759766, + "eval_runtime": 6.7409, + "eval_samples_per_second": 535.242, + "eval_steps_per_second": 8.456, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00016799813420346416, + "loss": 0.765, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8090510368347168, + "eval_runtime": 6.9363, + "eval_samples_per_second": 520.16, + "eval_steps_per_second": 8.218, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016661676471535223, + "loss": 0.7549, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00016477473774031664, + "loss": 0.7479, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8072544932365417, + "eval_runtime": 6.8482, + "eval_samples_per_second": 526.856, + "eval_steps_per_second": 8.323, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00016248241144874233, + "loss": 0.751, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001597526761547386, + "loss": 0.7423, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8027817606925964, + "eval_runtime": 6.978, + "eval_samples_per_second": 517.056, + "eval_steps_per_second": 8.169, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001566008818307348, + "loss": 0.733, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.81928551197052, + "eval_runtime": 6.5386, + "eval_samples_per_second": 551.803, + "eval_steps_per_second": 8.718, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.000153044751790813, + "loss": 0.735, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00014910428302815717, + "loss": 0.7285, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8058004975318909, + "eval_runtime": 6.6564, + "eval_samples_per_second": 542.036, + "eval_steps_per_second": 8.563, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00014480163376704585, + "loss": 0.7271, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8049086332321167, + "eval_runtime": 6.8815, + "eval_samples_per_second": 524.308, + "eval_steps_per_second": 8.283, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00014016099886171143, + "loss": 0.7237, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00013520847374272853, + "loss": 0.7194, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8057820796966553, + "eval_runtime": 6.9273, + "eval_samples_per_second": 520.84, + "eval_steps_per_second": 8.228, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012997190767599458, + "loss": 0.7183, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00012448074715946326, + "loss": 0.7183, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8052031397819519, + "eval_runtime": 6.6594, + "eval_samples_per_second": 541.789, + "eval_steps_per_second": 8.559, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011876587033824966, + "loss": 0.7096, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.905210643015521, + "eval_loss": 0.8224084973335266, + "eval_runtime": 6.8143, + "eval_samples_per_second": 529.473, + "eval_steps_per_second": 8.365, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0001128594133692317, + "loss": 0.7168, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010679458971154177, + "loss": 0.7048, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.82257080078125, + "eval_runtime": 6.8795, + "eval_samples_per_second": 524.455, + "eval_steps_per_second": 8.285, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00010060550335912358, + "loss": 0.7102, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.432695706559268e-05, + "loss": 0.7041, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8133431673049927, + "eval_runtime": 6.8419, + "eval_samples_per_second": 527.341, + "eval_steps_per_second": 8.331, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 8.799425663979943e-05, + "loss": 0.705, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8141534328460693, + "eval_runtime": 6.5877, + "eval_samples_per_second": 547.688, + "eval_steps_per_second": 8.652, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.164301241258918e-05, + "loss": 0.6984, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 7.530893899115998e-05, + "loss": 0.7021, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8113471269607544, + "eval_runtime": 6.6207, + "eval_samples_per_second": 544.959, + "eval_steps_per_second": 8.609, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 6.902765442704897e-05, + "loss": 0.6988, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.283447992707507e-05, + "loss": 0.6975, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.8156539797782898, + "eval_runtime": 6.8742, + "eval_samples_per_second": 524.86, + "eval_steps_per_second": 8.292, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 5.676424123351217e-05, + "loss": 0.7033, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8037310242652893, + "eval_runtime": 6.4458, + "eval_samples_per_second": 559.749, + "eval_steps_per_second": 8.843, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.085107279038151e-05, + "loss": 0.6945, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.5128225797084e-05, + "loss": 0.6921, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8072209358215332, + "eval_runtime": 6.7148, + "eval_samples_per_second": 537.325, + "eval_steps_per_second": 8.489, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 3.962788122873377e-05, + "loss": 0.6907, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8109329342842102, + "eval_runtime": 6.744, + "eval_samples_per_second": 534.997, + "eval_steps_per_second": 8.452, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.438096887462652e-05, + "loss": 0.6956, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.941699341243256e-05, + "loss": 0.6922, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8046855330467224, + "eval_runtime": 6.8718, + "eval_samples_per_second": 525.041, + "eval_steps_per_second": 8.295, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.4763868496143275e-05, + "loss": 0.692, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.0447759790734464e-05, + "loss": 0.6902, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.8009201884269714, + "eval_runtime": 6.8715, + "eval_samples_per_second": 525.07, + "eval_steps_per_second": 8.295, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.6492937836201666e-05, + "loss": 0.687, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9223946784922394, + "eval_loss": 0.8014901280403137, + "eval_runtime": 6.5784, + "eval_samples_per_second": 548.458, + "eval_steps_per_second": 8.665, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.2921641568348874e-05, + "loss": 0.6886, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 9.753953263786964e-06, + "loss": 0.6866, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9218403547671841, + "eval_loss": 0.804236650466919, + "eval_runtime": 6.9602, + "eval_samples_per_second": 518.379, + "eval_steps_per_second": 8.189, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 7.0076856123569005e-06, + "loss": 0.6867, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 4.698281551996917e-06, + "loss": 0.6913, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9235033259423503, + "eval_loss": 0.798970639705658, + "eval_runtime": 6.845, + "eval_samples_per_second": 527.098, + "eval_steps_per_second": 8.327, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.8387274293072607e-06, + "loss": 0.688, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9237804878048781, + "eval_loss": 0.801655650138855, + "eval_runtime": 6.3824, + "eval_samples_per_second": 565.303, + "eval_steps_per_second": 8.931, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.4394799741316684e-06, + "loss": 0.6865, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 5.084074987955168e-07, + "loss": 0.6851, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.8011634349822998, + "eval_runtime": 6.5916, + "eval_samples_per_second": 547.362, + "eval_steps_per_second": 8.647, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 5.074565265189852e-08, + "loss": 0.6887, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9279379157427938, + "eval_loss": 0.7936830520629883, + "eval_runtime": 6.9425, + "eval_samples_per_second": 519.696, + "eval_steps_per_second": 8.21, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00016939310479324672, + "metric": "eval/loss", + "warmup_ratio": 0.13517961452605504 + } +} diff --git a/run-rc8xrzwa/checkpoint-1260/training_args.bin b/run-rc8xrzwa/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a5a97fd93516d9d41489e5942ae1cc73501d46a --- /dev/null +++ b/run-rc8xrzwa/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:468eada5611f929bd71067ca8c966af333948a64ab2bb29e2d4cfa90b0354b5c +size 4792 diff --git a/run-rfjnleiw/checkpoint-1190/model.safetensors b/run-rfjnleiw/checkpoint-1190/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..390fd2f39ebc7d0506b367483a59ae03a79954ca --- /dev/null +++ b/run-rfjnleiw/checkpoint-1190/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b461046bc539b6457272ea947640c9eaa780e2355508fda65927689a0d27342d +size 198025308 diff --git a/run-rfjnleiw/checkpoint-1190/optimizer.pt b/run-rfjnleiw/checkpoint-1190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c8276c61b20014b0e48caba0f0a1c4dfaba9434 --- /dev/null +++ b/run-rfjnleiw/checkpoint-1190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01e993f4fa592881119139c9e26d335aff2a70cdbf23cda234be13b89e161adf +size 395900602 diff --git a/run-rfjnleiw/checkpoint-1190/rng_state.pth b/run-rfjnleiw/checkpoint-1190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa10329c52a02715f873c9a50812feb7d32c8cd3 --- /dev/null +++ b/run-rfjnleiw/checkpoint-1190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f5febab37757cc5268c77056c937c9c526090d892464a785cf2004d48e5d85 +size 14244 diff --git a/run-rfjnleiw/checkpoint-1190/scheduler.pt b/run-rfjnleiw/checkpoint-1190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd4b135820cd2c562d2fcc288f7838977d56bda8 --- /dev/null +++ b/run-rfjnleiw/checkpoint-1190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f7773e653f770ef9099facea8eb2c7ead910b81267ff299bea09feb1efe848 +size 1064 diff --git a/run-rfjnleiw/checkpoint-1190/trainer_state.json b/run-rfjnleiw/checkpoint-1190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..60373c762171fb62f11e576d8b9c534a7bf676a6 --- /dev/null +++ b/run-rfjnleiw/checkpoint-1190/trainer_state.json @@ -0,0 +1,549 @@ +{ + "best_metric": 0.9193458980044346, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-rfjnleiw/checkpoint-1190", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 1190, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.582392904596617e-05, + "loss": 1.2019, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8977272727272727, + "eval_loss": 0.9365694522857666, + "eval_runtime": 6.8275, + "eval_samples_per_second": 528.451, + "eval_steps_per_second": 8.349, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015164785809193234, + "loss": 0.856, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00022747178713789846, + "loss": 0.8076, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8357753753662109, + "eval_runtime": 6.793, + "eval_samples_per_second": 531.134, + "eval_steps_per_second": 8.391, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0003032957161838647, + "loss": 0.7987, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8387001752853394, + "eval_runtime": 6.7138, + "eval_samples_per_second": 537.404, + "eval_steps_per_second": 8.49, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003791196452298308, + "loss": 0.8041, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004549435742757969, + "loss": 0.7932, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.31929046563192903, + "eval_loss": 1.701465129852295, + "eval_runtime": 6.7673, + "eval_samples_per_second": 533.15, + "eval_steps_per_second": 8.423, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005307675033217631, + "loss": 0.7991, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006065914323677294, + "loss": 0.8138, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8483924611973392, + "eval_loss": 0.919805645942688, + "eval_runtime": 6.8595, + "eval_samples_per_second": 525.989, + "eval_steps_per_second": 8.31, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006824153614136955, + "loss": 0.8102, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8306541019955654, + "eval_loss": 1.0048964023590088, + "eval_runtime": 6.3842, + "eval_samples_per_second": 565.143, + "eval_steps_per_second": 8.928, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007582392904596616, + "loss": 0.816, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0008340632195056278, + "loss": 0.8344, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8569844789356984, + "eval_loss": 0.8949978351593018, + "eval_runtime": 6.7672, + "eval_samples_per_second": 533.157, + "eval_steps_per_second": 8.423, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0009040446520564186, + "loss": 0.8535, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0009021181482198292, + "loss": 0.8258, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8317627494456763, + "eval_loss": 0.9528336524963379, + "eval_runtime": 6.9199, + "eval_samples_per_second": 521.393, + "eval_steps_per_second": 8.237, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0008968663363771482, + "loss": 0.8245, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.845620842572062, + "eval_loss": 0.9468299150466919, + "eval_runtime": 6.6043, + "eval_samples_per_second": 546.313, + "eval_steps_per_second": 8.631, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0008883280173412453, + "loss": 0.8324, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008765662729073294, + "loss": 0.8247, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.875, + "eval_loss": 0.8675280213356018, + "eval_runtime": 6.7765, + "eval_samples_per_second": 532.428, + "eval_steps_per_second": 8.411, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0008616679997995171, + "loss": 0.8185, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8575388026607539, + "eval_loss": 0.8937786817550659, + "eval_runtime": 6.6865, + "eval_samples_per_second": 539.597, + "eval_steps_per_second": 8.525, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008437432676707697, + "loss": 0.8205, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008229245058993482, + "loss": 0.8153, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.863359201773836, + "eval_loss": 0.8942385315895081, + "eval_runtime": 6.8631, + "eval_samples_per_second": 525.706, + "eval_steps_per_second": 8.305, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.000799365525189801, + "loss": 0.8115, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0007732403812069726, + "loss": 0.8087, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.883869179600887, + "eval_loss": 0.8573442101478577, + "eval_runtime": 6.7996, + "eval_samples_per_second": 530.62, + "eval_steps_per_second": 8.383, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0007447420886386022, + "loss": 0.7913, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8373059866962306, + "eval_loss": 0.9251670241355896, + "eval_runtime": 6.8857, + "eval_samples_per_second": 523.987, + "eval_steps_per_second": 8.278, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0007140811951871172, + "loss": 0.8138, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0006814842260260909, + "loss": 0.7897, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8888580931263859, + "eval_loss": 0.8425456881523132, + "eval_runtime": 6.6689, + "eval_samples_per_second": 541.02, + "eval_steps_per_second": 8.547, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0006471920102138424, + "loss": 0.7983, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.000611457901428776, + "loss": 0.7866, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8827605321507761, + "eval_loss": 0.8681816458702087, + "eval_runtime": 6.7107, + "eval_samples_per_second": 537.652, + "eval_steps_per_second": 8.494, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005745459061718049, + "loss": 0.7851, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8252034783363342, + "eval_runtime": 6.6425, + "eval_samples_per_second": 543.173, + "eval_steps_per_second": 8.581, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0005367287332648605, + "loss": 0.7732, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0004982857790559438, + "loss": 0.7699, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8350561857223511, + "eval_runtime": 6.716, + "eval_samples_per_second": 537.221, + "eval_steps_per_second": 8.487, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0004595010632161912, + "loss": 0.7674, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0004206611303794529, + "loss": 0.7578, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9013303769401331, + "eval_loss": 0.8320653438568115, + "eval_runtime": 6.7319, + "eval_samples_per_second": 535.952, + "eval_steps_per_second": 8.467, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0003820529331272425, + "loss": 0.7602, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.836357831954956, + "eval_runtime": 6.6858, + "eval_samples_per_second": 539.65, + "eval_steps_per_second": 8.526, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003439617119597344, + "loss": 0.749, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0003066688879157591, + "loss": 0.7438, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8286829590797424, + "eval_runtime": 7.0901, + "eval_samples_per_second": 508.877, + "eval_steps_per_second": 8.039, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0002704499834112876, + "loss": 0.7397, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8207115530967712, + "eval_runtime": 6.5342, + "eval_samples_per_second": 552.17, + "eval_steps_per_second": 8.723, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00023557258665742286, + "loss": 0.7404, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0002022943746969412, + "loss": 0.7341, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8180325627326965, + "eval_runtime": 6.464, + "eval_samples_per_second": 558.169, + "eval_steps_per_second": 8.818, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00017086120966534996, + "loss": 0.7306, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00014150532234144208, + "loss": 0.7208, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.8217902183532715, + "eval_runtime": 6.8269, + "eval_samples_per_second": 528.495, + "eval_steps_per_second": 8.349, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00011444359640742261, + "loss": 0.7181, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8162592053413391, + "eval_runtime": 6.632, + "eval_samples_per_second": 544.031, + "eval_steps_per_second": 8.595, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 8.987596609463e-05, + "loss": 0.7161, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.798393905318233e-05, + "loss": 0.7138, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8213950991630554, + "eval_runtime": 6.7014, + "eval_samples_per_second": 538.395, + "eval_steps_per_second": 8.506, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.892925535869978e-05, + "loss": 0.7078, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.28526925634771e-05, + "loss": 0.7137, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8117570877075195, + "eval_runtime": 6.8191, + "eval_samples_per_second": 529.1, + "eval_steps_per_second": 8.359, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.987302562048209e-05, + "loss": 0.7098, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8080064654350281, + "eval_runtime": 6.7854, + "eval_samples_per_second": 531.728, + "eval_steps_per_second": 8.4, + "step": 1190 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0009040545386249812, + "metric": "eval/loss", + "warmup_ratio": 0.245263723255476 + } +} diff --git a/run-rfjnleiw/checkpoint-1190/training_args.bin b/run-rfjnleiw/checkpoint-1190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e186e2112fd7fc252a34ed3ab90a4e92ca51033 --- /dev/null +++ b/run-rfjnleiw/checkpoint-1190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fddc756afc24aa7f38a5b5250f75da5f4430e22524c123389a76c5153b07761d +size 4792 diff --git a/run-rfjnleiw/checkpoint-1260/model.safetensors b/run-rfjnleiw/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..350d8f7f5cdb4dc59672c5f14bc6ed4d5de193ae --- /dev/null +++ b/run-rfjnleiw/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e170fc47480b1add9192f9a906ca576217099b78264eb80e02a1bb695dbc1156 +size 198025308 diff --git a/run-rfjnleiw/checkpoint-1260/optimizer.pt b/run-rfjnleiw/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..816c691b44de82e74b613c687f6fc30abd893488 --- /dev/null +++ b/run-rfjnleiw/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faef2cdd3c8526217f844c44a4cc0dafb5941ab2d84f4ee857c47f995d71ae65 +size 395900602 diff --git a/run-rfjnleiw/checkpoint-1260/rng_state.pth b/run-rfjnleiw/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-rfjnleiw/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-rfjnleiw/checkpoint-1260/scheduler.pt b/run-rfjnleiw/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5cece3caf41ad878a092f55e8e8d1c2fd7216fe --- /dev/null +++ b/run-rfjnleiw/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8dd9563d3dd3253b1deeae041b0c3424538a2583f855f12629390e1079cbff +size 1064 diff --git a/run-rfjnleiw/checkpoint-1260/trainer_state.json b/run-rfjnleiw/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e9fa9f27cca053e3e27d1fbfdbe11bedcb41c96e --- /dev/null +++ b/run-rfjnleiw/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9193458980044346, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-rfjnleiw/checkpoint-1190", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.582392904596617e-05, + "loss": 1.2019, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8977272727272727, + "eval_loss": 0.9365694522857666, + "eval_runtime": 6.8275, + "eval_samples_per_second": 528.451, + "eval_steps_per_second": 8.349, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015164785809193234, + "loss": 0.856, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00022747178713789846, + "loss": 0.8076, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8968957871396895, + "eval_loss": 0.8357753753662109, + "eval_runtime": 6.793, + "eval_samples_per_second": 531.134, + "eval_steps_per_second": 8.391, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0003032957161838647, + "loss": 0.7987, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8387001752853394, + "eval_runtime": 6.7138, + "eval_samples_per_second": 537.404, + "eval_steps_per_second": 8.49, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003791196452298308, + "loss": 0.8041, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004549435742757969, + "loss": 0.7932, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.31929046563192903, + "eval_loss": 1.701465129852295, + "eval_runtime": 6.7673, + "eval_samples_per_second": 533.15, + "eval_steps_per_second": 8.423, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005307675033217631, + "loss": 0.7991, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006065914323677294, + "loss": 0.8138, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8483924611973392, + "eval_loss": 0.919805645942688, + "eval_runtime": 6.8595, + "eval_samples_per_second": 525.989, + "eval_steps_per_second": 8.31, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006824153614136955, + "loss": 0.8102, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8306541019955654, + "eval_loss": 1.0048964023590088, + "eval_runtime": 6.3842, + "eval_samples_per_second": 565.143, + "eval_steps_per_second": 8.928, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007582392904596616, + "loss": 0.816, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0008340632195056278, + "loss": 0.8344, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8569844789356984, + "eval_loss": 0.8949978351593018, + "eval_runtime": 6.7672, + "eval_samples_per_second": 533.157, + "eval_steps_per_second": 8.423, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0009040446520564186, + "loss": 0.8535, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0009021181482198292, + "loss": 0.8258, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8317627494456763, + "eval_loss": 0.9528336524963379, + "eval_runtime": 6.9199, + "eval_samples_per_second": 521.393, + "eval_steps_per_second": 8.237, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0008968663363771482, + "loss": 0.8245, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.845620842572062, + "eval_loss": 0.9468299150466919, + "eval_runtime": 6.6043, + "eval_samples_per_second": 546.313, + "eval_steps_per_second": 8.631, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0008883280173412453, + "loss": 0.8324, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008765662729073294, + "loss": 0.8247, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.875, + "eval_loss": 0.8675280213356018, + "eval_runtime": 6.7765, + "eval_samples_per_second": 532.428, + "eval_steps_per_second": 8.411, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0008616679997995171, + "loss": 0.8185, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8575388026607539, + "eval_loss": 0.8937786817550659, + "eval_runtime": 6.6865, + "eval_samples_per_second": 539.597, + "eval_steps_per_second": 8.525, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008437432676707697, + "loss": 0.8205, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008229245058993482, + "loss": 0.8153, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.863359201773836, + "eval_loss": 0.8942385315895081, + "eval_runtime": 6.8631, + "eval_samples_per_second": 525.706, + "eval_steps_per_second": 8.305, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.000799365525189801, + "loss": 0.8115, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0007732403812069726, + "loss": 0.8087, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.883869179600887, + "eval_loss": 0.8573442101478577, + "eval_runtime": 6.7996, + "eval_samples_per_second": 530.62, + "eval_steps_per_second": 8.383, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0007447420886386022, + "loss": 0.7913, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8373059866962306, + "eval_loss": 0.9251670241355896, + "eval_runtime": 6.8857, + "eval_samples_per_second": 523.987, + "eval_steps_per_second": 8.278, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0007140811951871172, + "loss": 0.8138, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0006814842260260909, + "loss": 0.7897, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8888580931263859, + "eval_loss": 0.8425456881523132, + "eval_runtime": 6.6689, + "eval_samples_per_second": 541.02, + "eval_steps_per_second": 8.547, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0006471920102138424, + "loss": 0.7983, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.000611457901428776, + "loss": 0.7866, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8827605321507761, + "eval_loss": 0.8681816458702087, + "eval_runtime": 6.7107, + "eval_samples_per_second": 537.652, + "eval_steps_per_second": 8.494, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005745459061718049, + "loss": 0.7851, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.8252034783363342, + "eval_runtime": 6.6425, + "eval_samples_per_second": 543.173, + "eval_steps_per_second": 8.581, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0005367287332648605, + "loss": 0.7732, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0004982857790559438, + "loss": 0.7699, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9004988913525499, + "eval_loss": 0.8350561857223511, + "eval_runtime": 6.716, + "eval_samples_per_second": 537.221, + "eval_steps_per_second": 8.487, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0004595010632161912, + "loss": 0.7674, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0004206611303794529, + "loss": 0.7578, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9013303769401331, + "eval_loss": 0.8320653438568115, + "eval_runtime": 6.7319, + "eval_samples_per_second": 535.952, + "eval_steps_per_second": 8.467, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0003820529331272425, + "loss": 0.7602, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8930155210643016, + "eval_loss": 0.836357831954956, + "eval_runtime": 6.6858, + "eval_samples_per_second": 539.65, + "eval_steps_per_second": 8.526, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003439617119597344, + "loss": 0.749, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.0003066688879157591, + "loss": 0.7438, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8286829590797424, + "eval_runtime": 7.0901, + "eval_samples_per_second": 508.877, + "eval_steps_per_second": 8.039, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0002704499834112876, + "loss": 0.7397, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9099223946784922, + "eval_loss": 0.8207115530967712, + "eval_runtime": 6.5342, + "eval_samples_per_second": 552.17, + "eval_steps_per_second": 8.723, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00023557258665742286, + "loss": 0.7404, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.0002022943746969412, + "loss": 0.7341, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8180325627326965, + "eval_runtime": 6.464, + "eval_samples_per_second": 558.169, + "eval_steps_per_second": 8.818, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.00017086120966534996, + "loss": 0.7306, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00014150532234144208, + "loss": 0.7208, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.8217902183532715, + "eval_runtime": 6.8269, + "eval_samples_per_second": 528.495, + "eval_steps_per_second": 8.349, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00011444359640742261, + "loss": 0.7181, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8162592053413391, + "eval_runtime": 6.632, + "eval_samples_per_second": 544.031, + "eval_steps_per_second": 8.595, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 8.987596609463e-05, + "loss": 0.7161, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.798393905318233e-05, + "loss": 0.7138, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8213950991630554, + "eval_runtime": 6.7014, + "eval_samples_per_second": 538.395, + "eval_steps_per_second": 8.506, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.892925535869978e-05, + "loss": 0.7078, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.28526925634771e-05, + "loss": 0.7137, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8117570877075195, + "eval_runtime": 6.8191, + "eval_samples_per_second": 529.1, + "eval_steps_per_second": 8.359, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.987302562048209e-05, + "loss": 0.7098, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8080064654350281, + "eval_runtime": 6.7854, + "eval_samples_per_second": 531.728, + "eval_steps_per_second": 8.4, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.0086149364346142e-05, + "loss": 0.7073, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.5643700325299875e-06, + "loss": 0.7045, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8092620372772217, + "eval_runtime": 6.9491, + "eval_samples_per_second": 519.204, + "eval_steps_per_second": 8.203, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 3.558710609624814e-07, + "loss": 0.7068, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8106182217597961, + "eval_runtime": 6.9993, + "eval_samples_per_second": 515.479, + "eval_steps_per_second": 8.144, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0009040545386249812, + "metric": "eval/loss", + "warmup_ratio": 0.245263723255476 + } +} diff --git a/run-rfjnleiw/checkpoint-1260/training_args.bin b/run-rfjnleiw/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e186e2112fd7fc252a34ed3ab90a4e92ca51033 --- /dev/null +++ b/run-rfjnleiw/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fddc756afc24aa7f38a5b5250f75da5f4430e22524c123389a76c5153b07761d +size 4792 diff --git a/run-s0sh2ul4/checkpoint-1232/model.safetensors b/run-s0sh2ul4/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..270b19647dc2911f3d4222628e85fa2b29b7b073 --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ad37f4e64a545463cb89e032eb96786ceff3b0ecb3d6ebf0dc9bf4ef7dca535 +size 198025308 diff --git a/run-s0sh2ul4/checkpoint-1232/optimizer.pt b/run-s0sh2ul4/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..426ad9326e4e6ce02a3fe1000d3a0a3bb3615860 --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d00679553356f0c4caac0fea4b76d16f1dd1aeb3918ef08d2fd148b8ecc55fc +size 395900602 diff --git a/run-s0sh2ul4/checkpoint-1232/rng_state.pth b/run-s0sh2ul4/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-s0sh2ul4/checkpoint-1232/scheduler.pt b/run-s0sh2ul4/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f92e6c4d6736ec683fe1881d445bcfd31d7780bd --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69385cdf330e466c5ec9a68d88d47182c0ec650955687734391a65c6a0595103 +size 1064 diff --git a/run-s0sh2ul4/checkpoint-1232/trainer_state.json b/run-s0sh2ul4/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0d254a45cca87f483773fda03bc5fd373ed919cd --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9232261640798226, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-s0sh2ul4/checkpoint-1232", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.511241045566805e-05, + "loss": 1.3441, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.854490022172949, + "eval_loss": 0.9356087446212769, + "eval_runtime": 6.7168, + "eval_samples_per_second": 537.158, + "eval_steps_per_second": 8.486, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 5.02248209113361e-05, + "loss": 0.938, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.533723136700416e-05, + "loss": 0.844, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8183677196502686, + "eval_runtime": 6.5976, + "eval_samples_per_second": 546.867, + "eval_steps_per_second": 8.64, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001004496418226722, + "loss": 0.8089, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8146660923957825, + "eval_runtime": 6.5954, + "eval_samples_per_second": 547.049, + "eval_steps_per_second": 8.642, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00012556205227834025, + "loss": 0.7938, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001506744627340083, + "loss": 0.7832, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9010532150776053, + "eval_loss": 0.8461769223213196, + "eval_runtime": 6.7266, + "eval_samples_per_second": 536.376, + "eval_steps_per_second": 8.474, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00017578687318967637, + "loss": 0.7787, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00018044553586136762, + "loss": 0.7736, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8056672215461731, + "eval_runtime": 6.7995, + "eval_samples_per_second": 530.625, + "eval_steps_per_second": 8.383, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001797624790257522, + "loss": 0.7671, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8108975291252136, + "eval_runtime": 6.5107, + "eval_samples_per_second": 554.167, + "eval_steps_per_second": 8.755, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001785612919870289, + "loss": 0.7539, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00017684893216004548, + "loss": 0.7488, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8292457461357117, + "eval_runtime": 6.8483, + "eval_samples_per_second": 526.848, + "eval_steps_per_second": 8.323, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0001746353177318127, + "loss": 0.7517, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00017193327021421326, + "loss": 0.7391, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8558758314855875, + "eval_loss": 0.8991044163703918, + "eval_runtime": 6.9421, + "eval_samples_per_second": 519.727, + "eval_steps_per_second": 8.211, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00016875844018031554, + "loss": 0.7345, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.817716658115387, + "eval_runtime": 6.6813, + "eval_samples_per_second": 540.018, + "eval_steps_per_second": 8.531, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00016512921661443662, + "loss": 0.7361, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001610666204010093, + "loss": 0.7289, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8142815232276917, + "eval_runtime": 7.0796, + "eval_samples_per_second": 509.634, + "eval_steps_per_second": 8.051, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001565941825691779, + "loss": 0.728, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8124584555625916, + "eval_runtime": 6.8821, + "eval_samples_per_second": 524.256, + "eval_steps_per_second": 8.282, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.000151737807998344, + "loss": 0.7213, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0001465256253740963, + "loss": 0.7234, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8095098733901978, + "eval_runtime": 6.6298, + "eval_samples_per_second": 544.214, + "eval_steps_per_second": 8.598, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00014098782426359658, + "loss": 0.721, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00013515648025409988, + "loss": 0.7206, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8057302236557007, + "eval_runtime": 6.8234, + "eval_samples_per_second": 528.772, + "eval_steps_per_second": 8.354, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00012906536916742773, + "loss": 0.7131, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8245612978935242, + "eval_runtime": 7.0679, + "eval_samples_per_second": 510.477, + "eval_steps_per_second": 8.065, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00012274977142648488, + "loss": 0.7166, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001162462677069521, + "loss": 0.7048, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8202921152114868, + "eval_runtime": 7.1107, + "eval_samples_per_second": 507.405, + "eval_steps_per_second": 8.016, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00010959252705776367, + "loss": 0.7094, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010282708871760063, + "loss": 0.7072, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8158580660820007, + "eval_runtime": 6.9318, + "eval_samples_per_second": 520.497, + "eval_steps_per_second": 8.223, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 9.598913889114406e-05, + "loss": 0.7072, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8110833764076233, + "eval_runtime": 7.1738, + "eval_samples_per_second": 502.943, + "eval_steps_per_second": 7.946, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.911828377802581e-05, + "loss": 0.6997, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 8.22543201691203e-05, + "loss": 0.7007, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8140919208526611, + "eval_runtime": 7.0187, + "eval_samples_per_second": 514.056, + "eval_steps_per_second": 8.121, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 7.543700493890888e-05, + "loss": 0.6994, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.87058247690453e-05, + "loss": 0.7014, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8064132928848267, + "eval_runtime": 6.8728, + "eval_samples_per_second": 524.967, + "eval_steps_per_second": 8.294, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 6.209976743690912e-05, + "loss": 0.7013, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8034742474555969, + "eval_runtime": 6.9073, + "eval_samples_per_second": 522.344, + "eval_steps_per_second": 8.252, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.5657095993870485e-05, + "loss": 0.6923, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.941512714125255e-05, + "loss": 0.6953, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8065707683563232, + "eval_runtime": 6.9454, + "eval_samples_per_second": 519.477, + "eval_steps_per_second": 8.207, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 4.34100150876628e-05, + "loss": 0.6926, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8070576786994934, + "eval_runtime": 6.6013, + "eval_samples_per_second": 546.556, + "eval_steps_per_second": 8.635, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.7676542139617436e-05, + "loss": 0.6921, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.224791723838218e-05, + "loss": 0.6919, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8056824803352356, + "eval_runtime": 6.5939, + "eval_samples_per_second": 547.175, + "eval_steps_per_second": 8.644, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.7155583609927143e-05, + "loss": 0.6922, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.2429036642109738e-05, + "loss": 0.6884, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8054395318031311, + "eval_runtime": 6.9237, + "eval_samples_per_second": 521.112, + "eval_steps_per_second": 8.233, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.809565304396241e-05, + "loss": 0.6881, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.8054936528205872, + "eval_runtime": 6.97, + "eval_samples_per_second": 517.65, + "eval_steps_per_second": 8.178, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.4180532276613952e-05, + "loss": 0.6892, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.0706351174295419e-05, + "loss": 0.6861, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8038754463195801, + "eval_runtime": 6.8365, + "eval_samples_per_second": 527.757, + "eval_steps_per_second": 8.338, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 7.693232597482281e-06, + "loss": 0.6869, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 5.1586288789492215e-06, + "loss": 0.6908, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.7974593639373779, + "eval_runtime": 6.9875, + "eval_samples_per_second": 516.348, + "eval_steps_per_second": 8.157, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 3.1172207378316624e-06, + "loss": 0.6891, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8015906810760498, + "eval_runtime": 6.7703, + "eval_samples_per_second": 532.919, + "eval_steps_per_second": 8.419, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.580832247195227e-06, + "loss": 0.6834, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 5.583623476313033e-07, + "loss": 0.685, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.8011350035667419, + "eval_runtime": 6.7049, + "eval_samples_per_second": 538.115, + "eval_steps_per_second": 8.501, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00018061618289268944, + "metric": "eval/loss", + "warmup_ratio": 0.14829267996418982 + } +} diff --git a/run-s0sh2ul4/checkpoint-1232/training_args.bin b/run-s0sh2ul4/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dba36cde2a70b85b1c2448c5729a52b796eb1efd --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01219a46ba0393b6d15e2b392662adc7b0a2405a27467b563597aab335d077ff +size 4792 diff --git a/run-s0sh2ul4/checkpoint-1260/model.safetensors b/run-s0sh2ul4/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26701aa1ef08dec4abf73a4585126f781964e24f --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc69d8ac85de276b66a191218fcb46d30dd87f182e857208788067b7cc08b45 +size 198025308 diff --git a/run-s0sh2ul4/checkpoint-1260/optimizer.pt b/run-s0sh2ul4/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7f972c5d45f13115c1db35aedab54d2317a8e5a --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f832196d5d06ed32fc3e4020134bf66f729bc9de3b74d0e86634d11127248800 +size 395900602 diff --git a/run-s0sh2ul4/checkpoint-1260/rng_state.pth b/run-s0sh2ul4/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-s0sh2ul4/checkpoint-1260/scheduler.pt b/run-s0sh2ul4/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa389d17a637e0edf506aed315f37c9a189c69b9 --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da649561c98a9ca45052cc34f61575f70f4f2ace6b4eb659c89970b621b586c8 +size 1064 diff --git a/run-s0sh2ul4/checkpoint-1260/trainer_state.json b/run-s0sh2ul4/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..315bc2c68847c180bdb7b9d9c4fb017202e5e009 --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9257206208425721, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-s0sh2ul4/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.511241045566805e-05, + "loss": 1.3441, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.854490022172949, + "eval_loss": 0.9356087446212769, + "eval_runtime": 6.7168, + "eval_samples_per_second": 537.158, + "eval_steps_per_second": 8.486, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 5.02248209113361e-05, + "loss": 0.938, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 7.533723136700416e-05, + "loss": 0.844, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8183677196502686, + "eval_runtime": 6.5976, + "eval_samples_per_second": 546.867, + "eval_steps_per_second": 8.64, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001004496418226722, + "loss": 0.8089, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9118625277161863, + "eval_loss": 0.8146660923957825, + "eval_runtime": 6.5954, + "eval_samples_per_second": 547.049, + "eval_steps_per_second": 8.642, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00012556205227834025, + "loss": 0.7938, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001506744627340083, + "loss": 0.7832, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9010532150776053, + "eval_loss": 0.8461769223213196, + "eval_runtime": 6.7266, + "eval_samples_per_second": 536.376, + "eval_steps_per_second": 8.474, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00017578687318967637, + "loss": 0.7787, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00018044553586136762, + "loss": 0.7736, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8056672215461731, + "eval_runtime": 6.7995, + "eval_samples_per_second": 530.625, + "eval_steps_per_second": 8.383, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001797624790257522, + "loss": 0.7671, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8108975291252136, + "eval_runtime": 6.5107, + "eval_samples_per_second": 554.167, + "eval_steps_per_second": 8.755, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001785612919870289, + "loss": 0.7539, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00017684893216004548, + "loss": 0.7488, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8292457461357117, + "eval_runtime": 6.8483, + "eval_samples_per_second": 526.848, + "eval_steps_per_second": 8.323, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0001746353177318127, + "loss": 0.7517, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00017193327021421326, + "loss": 0.7391, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8558758314855875, + "eval_loss": 0.8991044163703918, + "eval_runtime": 6.9421, + "eval_samples_per_second": 519.727, + "eval_steps_per_second": 8.211, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00016875844018031554, + "loss": 0.7345, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.817716658115387, + "eval_runtime": 6.6813, + "eval_samples_per_second": 540.018, + "eval_steps_per_second": 8.531, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00016512921661443662, + "loss": 0.7361, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0001610666204010093, + "loss": 0.7289, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9129711751662971, + "eval_loss": 0.8142815232276917, + "eval_runtime": 7.0796, + "eval_samples_per_second": 509.634, + "eval_steps_per_second": 8.051, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0001565941825691779, + "loss": 0.728, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8124584555625916, + "eval_runtime": 6.8821, + "eval_samples_per_second": 524.256, + "eval_steps_per_second": 8.282, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.000151737807998344, + "loss": 0.7213, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0001465256253740963, + "loss": 0.7234, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8095098733901978, + "eval_runtime": 6.6298, + "eval_samples_per_second": 544.214, + "eval_steps_per_second": 8.598, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00014098782426359658, + "loss": 0.721, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00013515648025409988, + "loss": 0.7206, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8057302236557007, + "eval_runtime": 6.8234, + "eval_samples_per_second": 528.772, + "eval_steps_per_second": 8.354, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00012906536916742773, + "loss": 0.7131, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8245612978935242, + "eval_runtime": 7.0679, + "eval_samples_per_second": 510.477, + "eval_steps_per_second": 8.065, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00012274977142648488, + "loss": 0.7166, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001162462677069521, + "loss": 0.7048, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8202921152114868, + "eval_runtime": 7.1107, + "eval_samples_per_second": 507.405, + "eval_steps_per_second": 8.016, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00010959252705776367, + "loss": 0.7094, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010282708871760063, + "loss": 0.7072, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8158580660820007, + "eval_runtime": 6.9318, + "eval_samples_per_second": 520.497, + "eval_steps_per_second": 8.223, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 9.598913889114406e-05, + "loss": 0.7072, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.8110833764076233, + "eval_runtime": 7.1738, + "eval_samples_per_second": 502.943, + "eval_steps_per_second": 7.946, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.911828377802581e-05, + "loss": 0.6997, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 8.22543201691203e-05, + "loss": 0.7007, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8140919208526611, + "eval_runtime": 7.0187, + "eval_samples_per_second": 514.056, + "eval_steps_per_second": 8.121, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 7.543700493890888e-05, + "loss": 0.6994, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.87058247690453e-05, + "loss": 0.7014, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9146341463414634, + "eval_loss": 0.8064132928848267, + "eval_runtime": 6.8728, + "eval_samples_per_second": 524.967, + "eval_steps_per_second": 8.294, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 6.209976743690912e-05, + "loss": 0.7013, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8034742474555969, + "eval_runtime": 6.9073, + "eval_samples_per_second": 522.344, + "eval_steps_per_second": 8.252, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.5657095993870485e-05, + "loss": 0.6923, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 4.941512714125255e-05, + "loss": 0.6953, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8065707683563232, + "eval_runtime": 6.9454, + "eval_samples_per_second": 519.477, + "eval_steps_per_second": 8.207, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 4.34100150876628e-05, + "loss": 0.6926, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8070576786994934, + "eval_runtime": 6.6013, + "eval_samples_per_second": 546.556, + "eval_steps_per_second": 8.635, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.7676542139617436e-05, + "loss": 0.6921, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.224791723838218e-05, + "loss": 0.6919, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.8056824803352356, + "eval_runtime": 6.5939, + "eval_samples_per_second": 547.175, + "eval_steps_per_second": 8.644, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.7155583609927143e-05, + "loss": 0.6922, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.2429036642109738e-05, + "loss": 0.6884, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8054395318031311, + "eval_runtime": 6.9237, + "eval_samples_per_second": 521.112, + "eval_steps_per_second": 8.233, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.809565304396241e-05, + "loss": 0.6881, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9207317073170732, + "eval_loss": 0.8054936528205872, + "eval_runtime": 6.97, + "eval_samples_per_second": 517.65, + "eval_steps_per_second": 8.178, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.4180532276613952e-05, + "loss": 0.6892, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.0706351174295419e-05, + "loss": 0.6861, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8038754463195801, + "eval_runtime": 6.8365, + "eval_samples_per_second": 527.757, + "eval_steps_per_second": 8.338, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 7.693232597482281e-06, + "loss": 0.6869, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 5.1586288789492215e-06, + "loss": 0.6908, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.7974593639373779, + "eval_runtime": 6.9875, + "eval_samples_per_second": 516.348, + "eval_steps_per_second": 8.157, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 3.1172207378316624e-06, + "loss": 0.6891, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8015906810760498, + "eval_runtime": 6.7703, + "eval_samples_per_second": 532.919, + "eval_steps_per_second": 8.419, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.580832247195227e-06, + "loss": 0.6834, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 5.583623476313033e-07, + "loss": 0.685, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.8011350035667419, + "eval_runtime": 6.7049, + "eval_samples_per_second": 538.115, + "eval_steps_per_second": 8.501, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 5.573330355941265e-08, + "loss": 0.6885, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9257206208425721, + "eval_loss": 0.7937456965446472, + "eval_runtime": 7.0045, + "eval_samples_per_second": 515.1, + "eval_steps_per_second": 8.138, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00018061618289268944, + "metric": "eval/loss", + "warmup_ratio": 0.14829267996418982 + } +} diff --git a/run-s0sh2ul4/checkpoint-1260/training_args.bin b/run-s0sh2ul4/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dba36cde2a70b85b1c2448c5729a52b796eb1efd --- /dev/null +++ b/run-s0sh2ul4/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01219a46ba0393b6d15e2b392662adc7b0a2405a27467b563597aab335d077ff +size 4792 diff --git a/run-s8xkfejt/checkpoint-531/model.safetensors b/run-s8xkfejt/checkpoint-531/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d3133f261887fdb2623a121d2c27f58ab8c50df --- /dev/null +++ b/run-s8xkfejt/checkpoint-531/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e42f23b8294d33b63fc1b451ef5831a62956303a98544833a299131a62d9f3 +size 198025308 diff --git a/run-s8xkfejt/checkpoint-531/optimizer.pt b/run-s8xkfejt/checkpoint-531/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b16cccba20f42fe6281b06e182173228b53f6c55 --- /dev/null +++ b/run-s8xkfejt/checkpoint-531/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ecd50da7bda9180ade31a1c6bcf91803c2281355063d0f74430b98140f5bd5e +size 395900602 diff --git a/run-s8xkfejt/checkpoint-531/rng_state.pth b/run-s8xkfejt/checkpoint-531/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f50e97f6b8f8d895fe76d5cec6c2f434c3883a4 --- /dev/null +++ b/run-s8xkfejt/checkpoint-531/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ebc248f8c573102043dc64b4a3df9b7043de65474577f599703b3d52b79074 +size 14244 diff --git a/run-s8xkfejt/checkpoint-531/scheduler.pt b/run-s8xkfejt/checkpoint-531/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..625121c91998229005615eac3a648c65361f7935 --- /dev/null +++ b/run-s8xkfejt/checkpoint-531/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df18cac7cc163a1118f07c5733ca10c03ba329fdc9ae9670f18adeb66dafeac5 +size 1064 diff --git a/run-s8xkfejt/checkpoint-531/trainer_state.json b/run-s8xkfejt/checkpoint-531/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0878753ca7560912df3e7c64d9cb0d5d0ac20df5 --- /dev/null +++ b/run-s8xkfejt/checkpoint-531/trainer_state.json @@ -0,0 +1,568 @@ +{ + "best_metric": 0.9144598489202457, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-s8xkfejt/checkpoint-531", + "epoch": 24.988235294117647, + "eval_steps": 500, + "global_step": 531, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.285650814138023e-05, + "loss": 1.2836, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1190669536590576, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2383, + "eval_samples_per_second": 437.954, + "eval_steps_per_second": 3.52, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00016571301628276046, + "loss": 0.936, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002485695244241407, + "loss": 0.8217, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8985587583148559, + "eval_f1": 0.8891634979717559, + "eval_loss": 0.8351738452911377, + "eval_precision": 0.8915397370263759, + "eval_recall": 0.8985587583148559, + "eval_runtime": 8.2783, + "eval_samples_per_second": 435.837, + "eval_steps_per_second": 3.503, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0003314260325655209, + "loss": 0.7974, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8708425720620843, + "eval_f1": 0.8779037113840898, + "eval_loss": 0.8800716400146484, + "eval_precision": 0.8924362515910248, + "eval_recall": 0.8708425720620843, + "eval_runtime": 7.8716, + "eval_samples_per_second": 458.357, + "eval_steps_per_second": 3.684, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00041428254070690116, + "loss": 0.7908, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004971390488482814, + "loss": 0.7801, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6241685144124168, + "eval_f1": 0.6772739482747236, + "eval_loss": 1.22988760471344, + "eval_precision": 0.8827423462589504, + "eval_recall": 0.6241685144124168, + "eval_runtime": 8.0023, + "eval_samples_per_second": 450.871, + "eval_steps_per_second": 3.624, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005799955569896616, + "loss": 0.7932, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006050671761852134, + "loss": 0.7751, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8486696230598669, + "eval_f1": 0.851372752168712, + "eval_loss": 0.9210855960845947, + "eval_precision": 0.8691444060100726, + "eval_recall": 0.8486696230598669, + "eval_runtime": 7.9019, + "eval_samples_per_second": 456.598, + "eval_steps_per_second": 3.67, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006029670815939109, + "loss": 0.7813, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8697339246119734, + "eval_f1": 0.8488086097285563, + "eval_loss": 0.8821918368339539, + "eval_precision": 0.8662121525504849, + "eval_recall": 0.8697339246119734, + "eval_runtime": 7.7311, + "eval_samples_per_second": 466.687, + "eval_steps_per_second": 3.751, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005991183055609979, + "loss": 0.7754, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005935432657637392, + "loss": 0.7628, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8910753880266076, + "eval_f1": 0.8746018336737065, + "eval_loss": 0.8523077368736267, + "eval_precision": 0.8766437069671519, + "eval_recall": 0.8910753880266076, + "eval_runtime": 8.0762, + "eval_samples_per_second": 446.746, + "eval_steps_per_second": 3.591, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005862744347185522, + "loss": 0.7675, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005773541506407733, + "loss": 0.7597, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8827605321507761, + "eval_f1": 0.8668408993981753, + "eval_loss": 0.8588318824768066, + "eval_precision": 0.8679678777628531, + "eval_recall": 0.8827605321507761, + "eval_runtime": 7.4047, + "eval_samples_per_second": 487.258, + "eval_steps_per_second": 3.916, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00056683437084043, + "loss": 0.7544, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8824833702882483, + "eval_f1": 0.8586673290545209, + "eval_loss": 0.8741574287414551, + "eval_precision": 0.8724564776278178, + "eval_recall": 0.8824833702882483, + "eval_runtime": 8.0514, + "eval_samples_per_second": 448.12, + "eval_steps_per_second": 3.602, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005547763690903908, + "loss": 0.7556, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005412503787296138, + "loss": 0.7542, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8894124168514412, + "eval_f1": 0.8849482914115554, + "eval_loss": 0.8624522089958191, + "eval_precision": 0.8942201988692304, + "eval_recall": 0.8894124168514412, + "eval_runtime": 7.9475, + "eval_samples_per_second": 453.977, + "eval_steps_per_second": 3.649, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005263351835802814, + "loss": 0.7573, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8780487804878049, + "eval_f1": 0.8819671279669734, + "eval_loss": 0.8696417808532715, + "eval_precision": 0.8908389107232549, + "eval_recall": 0.8780487804878049, + "eval_runtime": 7.7861, + "eval_samples_per_second": 463.388, + "eval_steps_per_second": 3.725, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005101176590615765, + "loss": 0.7562, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0004926922661729414, + "loss": 0.7431, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8597560975609756, + "eval_f1": 0.8609433123700134, + "eval_loss": 0.9089389443397522, + "eval_precision": 0.8806718073269544, + "eval_recall": 0.8597560975609756, + "eval_runtime": 8.2089, + "eval_samples_per_second": 439.521, + "eval_steps_per_second": 3.533, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004741605012941739, + "loss": 0.7383, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00045463030500707177, + "loss": 0.7394, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9006352933631326, + "eval_loss": 0.8237543106079102, + "eval_precision": 0.8973657592457057, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.9781, + "eval_samples_per_second": 452.236, + "eval_steps_per_second": 3.635, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0004342154333820171, + "loss": 0.7284, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8873582365683673, + "eval_loss": 0.8422288298606873, + "eval_precision": 0.8937265755135332, + "eval_recall": 0.8977272727272727, + "eval_runtime": 8.0916, + "eval_samples_per_second": 445.895, + "eval_steps_per_second": 3.584, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0004130347953915207, + "loss": 0.7264, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003912117603100418, + "loss": 0.7231, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.8959019545241117, + "eval_loss": 0.8254969716072083, + "eval_precision": 0.8928208780348612, + "eval_recall": 0.8999445676274944, + "eval_runtime": 8.0417, + "eval_samples_per_second": 448.66, + "eval_steps_per_second": 3.606, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003688734391342231, + "loss": 0.7158, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00034614994420899704, + "loss": 0.7229, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9029933481152993, + "eval_f1": 0.8932144996931456, + "eval_loss": 0.8316783308982849, + "eval_precision": 0.8920823033906993, + "eval_recall": 0.9029933481152993, + "eval_runtime": 7.6367, + "eval_samples_per_second": 472.456, + "eval_steps_per_second": 3.797, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003231736313719621, + "loss": 0.7151, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8883037694013304, + "eval_f1": 0.889649094222398, + "eval_loss": 0.8442788124084473, + "eval_precision": 0.8940096508255989, + "eval_recall": 0.8883037694013304, + "eval_runtime": 7.9846, + "eval_samples_per_second": 451.868, + "eval_steps_per_second": 3.632, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003000783290302574, + "loss": 0.7185, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002769985586602708, + "loss": 0.7072, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.9025996814801257, + "eval_loss": 0.8200421333312988, + "eval_precision": 0.8996706075261323, + "eval_recall": 0.9101995565410199, + "eval_runtime": 7.2654, + "eval_samples_per_second": 496.598, + "eval_steps_per_second": 3.991, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002540687512704785, + "loss": 0.7038, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023142246439122692, + "loss": 0.7076, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.905899198688688, + "eval_loss": 0.8195255398750305, + "eval_precision": 0.9033767893838173, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.4442, + "eval_samples_per_second": 484.671, + "eval_steps_per_second": 3.896, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00020919160415219684, + "loss": 0.6989, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8996674057649667, + "eval_f1": 0.8984365441692502, + "eval_loss": 0.836618185043335, + "eval_precision": 0.8977538545127759, + "eval_recall": 0.8996674057649667, + "eval_runtime": 7.9436, + "eval_samples_per_second": 454.199, + "eval_steps_per_second": 3.651, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00018750565697865665, + "loss": 0.7006, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00016649093538158785, + "loss": 0.6977, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.9024834091449649, + "eval_loss": 0.8188351988792419, + "eval_precision": 0.9003965975597888, + "eval_recall": 0.9057649667405765, + "eval_runtime": 8.0299, + "eval_samples_per_second": 449.32, + "eval_steps_per_second": 3.611, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00014626984223466944, + "loss": 0.6959, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9084484709144038, + "eval_loss": 0.8180081844329834, + "eval_precision": 0.9054480556555874, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9431, + "eval_samples_per_second": 454.233, + "eval_steps_per_second": 3.651, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00012696015782343546, + "loss": 0.6936, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001086743538192739, + "loss": 0.6944, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.91425701615335, + "eval_loss": 0.8058905005455017, + "eval_precision": 0.913306231658411, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.1728, + "eval_samples_per_second": 441.467, + "eval_steps_per_second": 3.548, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 9.151893817411278e-05, + "loss": 0.6919, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 7.559383475153659e-05, + "loss": 0.6916, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9103158933356634, + "eval_loss": 0.8169350028038025, + "eval_precision": 0.9088985274664754, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.974, + "eval_samples_per_second": 452.468, + "eval_steps_per_second": 3.637, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 6.099180130774787e-05, + "loss": 0.6926, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9144598489202457, + "eval_loss": 0.8044824600219727, + "eval_precision": 0.9119132660654642, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.7681, + "eval_samples_per_second": 464.464, + "eval_steps_per_second": 3.733, + "step": 531 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.193882815250863, + "learning_rate": 0.0006054898671870093, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-s8xkfejt/checkpoint-531/training_args.bin b/run-s8xkfejt/checkpoint-531/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b498450ef3fd5676c9af9a714cde94802d485da2 --- /dev/null +++ b/run-s8xkfejt/checkpoint-531/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7411c068e0bea8cc9d85bbfef2f23aaa309ccd80dcbd8238702e34c93eb48e8b +size 4792 diff --git a/run-s8xkfejt/checkpoint-630/model.safetensors b/run-s8xkfejt/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e093f9b63da052240f54234858e8249dafd0d6f3 --- /dev/null +++ b/run-s8xkfejt/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9f3ed194a1bf9a800b2b12fbea16c51f38b227bf26199956f40e0a4d7e901f +size 198025308 diff --git a/run-s8xkfejt/checkpoint-630/optimizer.pt b/run-s8xkfejt/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..715b4b4387cabf29312cea6ddce081e546754293 --- /dev/null +++ b/run-s8xkfejt/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee44cbe0ea10d896ae7e46b5455526e4ec9782026006f78da8088a10bda3f5e0 +size 395900602 diff --git a/run-s8xkfejt/checkpoint-630/rng_state.pth b/run-s8xkfejt/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-s8xkfejt/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-s8xkfejt/checkpoint-630/scheduler.pt b/run-s8xkfejt/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e54a1a5eccf6d229ce06f444974929cd97d7b49a --- /dev/null +++ b/run-s8xkfejt/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a044aacbca74098f07ed56a2022553d2936d571626f14f90bc4b50621e498bc +size 1064 diff --git a/run-s8xkfejt/checkpoint-630/trainer_state.json b/run-s8xkfejt/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..75d52f57a0e1a2894f11f6809dc5a3e698db04fa --- /dev/null +++ b/run-s8xkfejt/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9144598489202457, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-s8xkfejt/checkpoint-531", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.285650814138023e-05, + "loss": 1.2836, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 1.1190669536590576, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2383, + "eval_samples_per_second": 437.954, + "eval_steps_per_second": 3.52, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00016571301628276046, + "loss": 0.936, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002485695244241407, + "loss": 0.8217, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8985587583148559, + "eval_f1": 0.8891634979717559, + "eval_loss": 0.8351738452911377, + "eval_precision": 0.8915397370263759, + "eval_recall": 0.8985587583148559, + "eval_runtime": 8.2783, + "eval_samples_per_second": 435.837, + "eval_steps_per_second": 3.503, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0003314260325655209, + "loss": 0.7974, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8708425720620843, + "eval_f1": 0.8779037113840898, + "eval_loss": 0.8800716400146484, + "eval_precision": 0.8924362515910248, + "eval_recall": 0.8708425720620843, + "eval_runtime": 7.8716, + "eval_samples_per_second": 458.357, + "eval_steps_per_second": 3.684, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00041428254070690116, + "loss": 0.7908, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004971390488482814, + "loss": 0.7801, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6241685144124168, + "eval_f1": 0.6772739482747236, + "eval_loss": 1.22988760471344, + "eval_precision": 0.8827423462589504, + "eval_recall": 0.6241685144124168, + "eval_runtime": 8.0023, + "eval_samples_per_second": 450.871, + "eval_steps_per_second": 3.624, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0005799955569896616, + "loss": 0.7932, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0006050671761852134, + "loss": 0.7751, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8486696230598669, + "eval_f1": 0.851372752168712, + "eval_loss": 0.9210855960845947, + "eval_precision": 0.8691444060100726, + "eval_recall": 0.8486696230598669, + "eval_runtime": 7.9019, + "eval_samples_per_second": 456.598, + "eval_steps_per_second": 3.67, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006029670815939109, + "loss": 0.7813, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8697339246119734, + "eval_f1": 0.8488086097285563, + "eval_loss": 0.8821918368339539, + "eval_precision": 0.8662121525504849, + "eval_recall": 0.8697339246119734, + "eval_runtime": 7.7311, + "eval_samples_per_second": 466.687, + "eval_steps_per_second": 3.751, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0005991183055609979, + "loss": 0.7754, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0005935432657637392, + "loss": 0.7628, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8910753880266076, + "eval_f1": 0.8746018336737065, + "eval_loss": 0.8523077368736267, + "eval_precision": 0.8766437069671519, + "eval_recall": 0.8910753880266076, + "eval_runtime": 8.0762, + "eval_samples_per_second": 446.746, + "eval_steps_per_second": 3.591, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0005862744347185522, + "loss": 0.7675, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0005773541506407733, + "loss": 0.7597, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8827605321507761, + "eval_f1": 0.8668408993981753, + "eval_loss": 0.8588318824768066, + "eval_precision": 0.8679678777628531, + "eval_recall": 0.8827605321507761, + "eval_runtime": 7.4047, + "eval_samples_per_second": 487.258, + "eval_steps_per_second": 3.916, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00056683437084043, + "loss": 0.7544, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8824833702882483, + "eval_f1": 0.8586673290545209, + "eval_loss": 0.8741574287414551, + "eval_precision": 0.8724564776278178, + "eval_recall": 0.8824833702882483, + "eval_runtime": 8.0514, + "eval_samples_per_second": 448.12, + "eval_steps_per_second": 3.602, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0005547763690903908, + "loss": 0.7556, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0005412503787296138, + "loss": 0.7542, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8894124168514412, + "eval_f1": 0.8849482914115554, + "eval_loss": 0.8624522089958191, + "eval_precision": 0.8942201988692304, + "eval_recall": 0.8894124168514412, + "eval_runtime": 7.9475, + "eval_samples_per_second": 453.977, + "eval_steps_per_second": 3.649, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0005263351835802814, + "loss": 0.7573, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8780487804878049, + "eval_f1": 0.8819671279669734, + "eval_loss": 0.8696417808532715, + "eval_precision": 0.8908389107232549, + "eval_recall": 0.8780487804878049, + "eval_runtime": 7.7861, + "eval_samples_per_second": 463.388, + "eval_steps_per_second": 3.725, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0005101176590615765, + "loss": 0.7562, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0004926922661729414, + "loss": 0.7431, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8597560975609756, + "eval_f1": 0.8609433123700134, + "eval_loss": 0.9089389443397522, + "eval_precision": 0.8806718073269544, + "eval_recall": 0.8597560975609756, + "eval_runtime": 8.2089, + "eval_samples_per_second": 439.521, + "eval_steps_per_second": 3.533, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0004741605012941739, + "loss": 0.7383, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00045463030500707177, + "loss": 0.7394, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9006352933631326, + "eval_loss": 0.8237543106079102, + "eval_precision": 0.8973657592457057, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.9781, + "eval_samples_per_second": 452.236, + "eval_steps_per_second": 3.635, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0004342154333820171, + "loss": 0.7284, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8873582365683673, + "eval_loss": 0.8422288298606873, + "eval_precision": 0.8937265755135332, + "eval_recall": 0.8977272727272727, + "eval_runtime": 8.0916, + "eval_samples_per_second": 445.895, + "eval_steps_per_second": 3.584, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0004130347953915207, + "loss": 0.7264, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003912117603100418, + "loss": 0.7231, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8999445676274944, + "eval_f1": 0.8959019545241117, + "eval_loss": 0.8254969716072083, + "eval_precision": 0.8928208780348612, + "eval_recall": 0.8999445676274944, + "eval_runtime": 8.0417, + "eval_samples_per_second": 448.66, + "eval_steps_per_second": 3.606, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003688734391342231, + "loss": 0.7158, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00034614994420899704, + "loss": 0.7229, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9029933481152993, + "eval_f1": 0.8932144996931456, + "eval_loss": 0.8316783308982849, + "eval_precision": 0.8920823033906993, + "eval_recall": 0.9029933481152993, + "eval_runtime": 7.6367, + "eval_samples_per_second": 472.456, + "eval_steps_per_second": 3.797, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003231736313719621, + "loss": 0.7151, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8883037694013304, + "eval_f1": 0.889649094222398, + "eval_loss": 0.8442788124084473, + "eval_precision": 0.8940096508255989, + "eval_recall": 0.8883037694013304, + "eval_runtime": 7.9846, + "eval_samples_per_second": 451.868, + "eval_steps_per_second": 3.632, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003000783290302574, + "loss": 0.7185, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002769985586602708, + "loss": 0.7072, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9101995565410199, + "eval_f1": 0.9025996814801257, + "eval_loss": 0.8200421333312988, + "eval_precision": 0.8996706075261323, + "eval_recall": 0.9101995565410199, + "eval_runtime": 7.2654, + "eval_samples_per_second": 496.598, + "eval_steps_per_second": 3.991, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002540687512704785, + "loss": 0.7038, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023142246439122692, + "loss": 0.7076, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.905899198688688, + "eval_loss": 0.8195255398750305, + "eval_precision": 0.9033767893838173, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.4442, + "eval_samples_per_second": 484.671, + "eval_steps_per_second": 3.896, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.00020919160415219684, + "loss": 0.6989, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8996674057649667, + "eval_f1": 0.8984365441692502, + "eval_loss": 0.836618185043335, + "eval_precision": 0.8977538545127759, + "eval_recall": 0.8996674057649667, + "eval_runtime": 7.9436, + "eval_samples_per_second": 454.199, + "eval_steps_per_second": 3.651, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.00018750565697865665, + "loss": 0.7006, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00016649093538158785, + "loss": 0.6977, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.9024834091449649, + "eval_loss": 0.8188351988792419, + "eval_precision": 0.9003965975597888, + "eval_recall": 0.9057649667405765, + "eval_runtime": 8.0299, + "eval_samples_per_second": 449.32, + "eval_steps_per_second": 3.611, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00014626984223466944, + "loss": 0.6959, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9084484709144038, + "eval_loss": 0.8180081844329834, + "eval_precision": 0.9054480556555874, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9431, + "eval_samples_per_second": 454.233, + "eval_steps_per_second": 3.651, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00012696015782343546, + "loss": 0.6936, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.0001086743538192739, + "loss": 0.6944, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.91425701615335, + "eval_loss": 0.8058905005455017, + "eval_precision": 0.913306231658411, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.1728, + "eval_samples_per_second": 441.467, + "eval_steps_per_second": 3.548, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 9.151893817411278e-05, + "loss": 0.6919, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 7.559383475153659e-05, + "loss": 0.6916, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9103158933356634, + "eval_loss": 0.8169350028038025, + "eval_precision": 0.9088985274664754, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.974, + "eval_samples_per_second": 452.468, + "eval_steps_per_second": 3.637, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 6.099180130774787e-05, + "loss": 0.6926, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9144598489202457, + "eval_loss": 0.8044824600219727, + "eval_precision": 0.9119132660654642, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.7681, + "eval_samples_per_second": 464.464, + "eval_steps_per_second": 3.733, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 4.779788921241715e-05, + "loss": 0.6844, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 3.608894805634592e-05, + "loss": 0.688, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9128115047357279, + "eval_loss": 0.8096891641616821, + "eval_precision": 0.910816670496659, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.7613, + "eval_samples_per_second": 464.87, + "eval_steps_per_second": 3.736, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.5933178031415196e-05, + "loss": 0.6887, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.738973269003976e-05, + "loss": 0.6877, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9064494886210999, + "eval_loss": 0.8168781995773315, + "eval_precision": 0.903185036874373, + "eval_recall": 0.9104767184035477, + "eval_runtime": 8.2838, + "eval_samples_per_second": 435.547, + "eval_steps_per_second": 3.501, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.0508374397905805e-05, + "loss": 0.6874, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.910707652251439, + "eval_loss": 0.8078291416168213, + "eval_precision": 0.9076488731439921, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.999, + "eval_samples_per_second": 451.059, + "eval_steps_per_second": 3.625, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 5.329184486849378e-06, + "loss": 0.6901, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.8823297961271867e-06, + "loss": 0.6904, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9137760118354532, + "eval_loss": 0.8095471858978271, + "eval_precision": 0.9114271579061634, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.2546, + "eval_samples_per_second": 437.09, + "eval_steps_per_second": 3.513, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.8788696188933662e-07, + "loss": 0.6833, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9082817420356288, + "eval_loss": 0.8085846900939941, + "eval_precision": 0.9059098387560125, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.8618, + "eval_samples_per_second": 458.929, + "eval_steps_per_second": 3.689, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.193882815250863, + "learning_rate": 0.0006054898671870093, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-s8xkfejt/checkpoint-630/training_args.bin b/run-s8xkfejt/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b498450ef3fd5676c9af9a714cde94802d485da2 --- /dev/null +++ b/run-s8xkfejt/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7411c068e0bea8cc9d85bbfef2f23aaa309ccd80dcbd8238702e34c93eb48e8b +size 4792 diff --git a/run-syiegpd6/checkpoint-1190/model.safetensors b/run-syiegpd6/checkpoint-1190/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ffe758782a635897bb9bb80a341a9aa0f7b06fb --- /dev/null +++ b/run-syiegpd6/checkpoint-1190/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e526e6d83f287e190a90b0340b95a75b4984fd76bf1911cdbb01884dc65f6de +size 198025308 diff --git a/run-syiegpd6/checkpoint-1190/optimizer.pt b/run-syiegpd6/checkpoint-1190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..49ca6ffa82a1290f5bbb75be01fd22c19da83a2c --- /dev/null +++ b/run-syiegpd6/checkpoint-1190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b43a53a576b8469d414477803cdb22f8ac93d9144b05358436935a7b9c73a40b +size 395900602 diff --git a/run-syiegpd6/checkpoint-1190/rng_state.pth b/run-syiegpd6/checkpoint-1190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa10329c52a02715f873c9a50812feb7d32c8cd3 --- /dev/null +++ b/run-syiegpd6/checkpoint-1190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f5febab37757cc5268c77056c937c9c526090d892464a785cf2004d48e5d85 +size 14244 diff --git a/run-syiegpd6/checkpoint-1190/scheduler.pt b/run-syiegpd6/checkpoint-1190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5610c37f714386d28ec85f4f7b1ef5c2ebfa6b81 --- /dev/null +++ b/run-syiegpd6/checkpoint-1190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cc0e66f89a035b0037919474952cd69f30f1425bcc3966476a974ee07c5d2dd +size 1064 diff --git a/run-syiegpd6/checkpoint-1190/trainer_state.json b/run-syiegpd6/checkpoint-1190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..02a833c766148cc42556eb6eeac381a219f7fc66 --- /dev/null +++ b/run-syiegpd6/checkpoint-1190/trainer_state.json @@ -0,0 +1,549 @@ +{ + "best_metric": 0.9246119733924612, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-syiegpd6/checkpoint-1190", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 1190, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.6782065994840123e-05, + "loss": 1.4012, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8292682926829268, + "eval_loss": 0.9448133707046509, + "eval_runtime": 6.6859, + "eval_samples_per_second": 539.64, + "eval_steps_per_second": 8.525, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.3564131989680246e-05, + "loss": 0.98, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 5.034619798452037e-05, + "loss": 0.873, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8445682525634766, + "eval_runtime": 6.7651, + "eval_samples_per_second": 533.329, + "eval_steps_per_second": 8.426, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 6.712826397936049e-05, + "loss": 0.8209, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8063568472862244, + "eval_runtime": 6.6845, + "eval_samples_per_second": 539.755, + "eval_steps_per_second": 8.527, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 8.391032997420061e-05, + "loss": 0.7995, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00010069239596904074, + "loss": 0.7895, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8422191143035889, + "eval_runtime": 6.7027, + "eval_samples_per_second": 538.287, + "eval_steps_per_second": 8.504, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011747446196388085, + "loss": 0.7789, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00013425652795872098, + "loss": 0.7762, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.7983577251434326, + "eval_runtime": 7.0047, + "eval_samples_per_second": 515.085, + "eval_steps_per_second": 8.137, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00015103859395356107, + "loss": 0.7707, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8985587583148559, + "eval_loss": 0.8174644112586975, + "eval_runtime": 6.818, + "eval_samples_per_second": 529.187, + "eval_steps_per_second": 8.36, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016782065994840121, + "loss": 0.7624, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00018460272594324133, + "loss": 0.7565, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8123602271080017, + "eval_runtime": 7.1584, + "eval_samples_per_second": 504.021, + "eval_steps_per_second": 7.963, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00020138479193808147, + "loss": 0.7599, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00021816685793292156, + "loss": 0.7519, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8221088647842407, + "eval_runtime": 6.9443, + "eval_samples_per_second": 519.563, + "eval_steps_per_second": 8.208, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002349489239277617, + "loss": 0.743, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8342572062084257, + "eval_loss": 0.9307757019996643, + "eval_runtime": 6.7109, + "eval_samples_per_second": 537.634, + "eval_steps_per_second": 8.494, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0002517309899226018, + "loss": 0.7554, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00026851305591744196, + "loss": 0.747, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8883037694013304, + "eval_loss": 0.852607011795044, + "eval_runtime": 6.9181, + "eval_samples_per_second": 521.531, + "eval_steps_per_second": 8.239, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002685748617990294, + "loss": 0.7491, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8259668350219727, + "eval_runtime": 7.1939, + "eval_samples_per_second": 501.537, + "eval_steps_per_second": 7.923, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00026673511858293306, + "loss": 0.7454, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00026365561454264595, + "loss": 0.7389, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9079822616407982, + "eval_loss": 0.8228020668029785, + "eval_runtime": 7.0289, + "eval_samples_per_second": 513.308, + "eval_steps_per_second": 8.109, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002593652386620386, + "loss": 0.7384, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00025390423917523956, + "loss": 0.7339, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9038248337028825, + "eval_loss": 0.8257579803466797, + "eval_runtime": 7.1, + "eval_samples_per_second": 508.172, + "eval_steps_per_second": 8.028, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002473238459959062, + "loss": 0.728, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8260374665260315, + "eval_runtime": 6.9642, + "eval_samples_per_second": 518.077, + "eval_steps_per_second": 8.185, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00023968579012680159, + "loss": 0.7391, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00023106172455812085, + "loss": 0.7224, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8655764966740577, + "eval_loss": 0.8903546929359436, + "eval_runtime": 7.1236, + "eval_samples_per_second": 506.487, + "eval_steps_per_second": 8.002, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00022153255208714982, + "loss": 0.7306, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00021118766636501292, + "loss": 0.7205, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9041019955654102, + "eval_loss": 0.8240099549293518, + "eval_runtime": 6.8831, + "eval_samples_per_second": 524.184, + "eval_steps_per_second": 8.281, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0002001241132902898, + "loss": 0.7251, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8179566860198975, + "eval_runtime": 6.4667, + "eval_samples_per_second": 557.939, + "eval_steps_per_second": 8.814, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0001884456806165101, + "loss": 0.7108, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00017626192431396392, + "loss": 0.7159, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8147507309913635, + "eval_runtime": 6.8379, + "eval_samples_per_second": 527.651, + "eval_steps_per_second": 8.336, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00016368714081957823, + "loss": 0.7093, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0001508392948162346, + "loss": 0.7126, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8287857174873352, + "eval_runtime": 6.6222, + "eval_samples_per_second": 544.836, + "eval_steps_per_second": 8.607, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001378389126000853, + "loss": 0.7125, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.807800829410553, + "eval_runtime": 6.4865, + "eval_samples_per_second": 556.231, + "eval_steps_per_second": 8.787, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00012480795141724542, + "loss": 0.7054, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00011186865537667, + "loss": 0.7021, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8086454272270203, + "eval_runtime": 6.8665, + "eval_samples_per_second": 525.451, + "eval_steps_per_second": 8.301, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 9.914240867195727e-05, + "loss": 0.6957, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.815013587474823, + "eval_runtime": 6.8071, + "eval_samples_per_second": 530.034, + "eval_steps_per_second": 8.374, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 8.674859687006062e-05, + "loss": 0.6966, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 7.480348694921928e-05, + "loss": 0.6969, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.8047890067100525, + "eval_runtime": 6.6868, + "eval_samples_per_second": 539.572, + "eval_steps_per_second": 8.524, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.341913659252787e-05, + "loss": 0.6955, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.2702342969117676e-05, + "loss": 0.6939, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.803421139717102, + "eval_runtime": 6.7218, + "eval_samples_per_second": 536.763, + "eval_steps_per_second": 8.48, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.275364086448689e-05, + "loss": 0.6894, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8005704283714294, + "eval_runtime": 6.8206, + "eval_samples_per_second": 528.987, + "eval_steps_per_second": 8.357, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.366635955856929e-05, + "loss": 0.6895, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.5525747299017654e-05, + "loss": 0.6887, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8079735040664673, + "eval_runtime": 6.8653, + "eval_samples_per_second": 525.543, + "eval_steps_per_second": 8.303, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.840817158306065e-05, + "loss": 0.6854, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.2380402750131451e-05, + "loss": 0.692, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8051853775978088, + "eval_runtime": 6.6356, + "eval_samples_per_second": 543.735, + "eval_steps_per_second": 8.59, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 7.49898760592083e-06, + "loss": 0.6897, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7973034381866455, + "eval_runtime": 6.78, + "eval_samples_per_second": 532.151, + "eval_steps_per_second": 8.407, + "step": 1190 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00026915851999416657, + "metric": "eval/loss", + "warmup_ratio": 0.330612213985626 + } +} diff --git a/run-syiegpd6/checkpoint-1190/training_args.bin b/run-syiegpd6/checkpoint-1190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8e2f3e00344fd3858ae85ebdb433294e6f5fafb --- /dev/null +++ b/run-syiegpd6/checkpoint-1190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1fe202d72d38e88746a685486e93d2c032ac2589f11862308b117e3f8103f8b +size 4792 diff --git a/run-syiegpd6/checkpoint-1260/model.safetensors b/run-syiegpd6/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e883c1f4c3a0a30c8d7a5aabca8a6837ccc9ad19 --- /dev/null +++ b/run-syiegpd6/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de5e3bb282694e8f8e13f3ac2a3f080bd0404b4af82d1e47f0dfbdaedf2255e4 +size 198025308 diff --git a/run-syiegpd6/checkpoint-1260/optimizer.pt b/run-syiegpd6/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..62edab1ebf7850bed39db8dd566730a68249d801 --- /dev/null +++ b/run-syiegpd6/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7b83887cd1a83cb973303363ab5085e27596e5d6772f47ca49afb210e94077 +size 395900602 diff --git a/run-syiegpd6/checkpoint-1260/rng_state.pth b/run-syiegpd6/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-syiegpd6/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-syiegpd6/checkpoint-1260/scheduler.pt b/run-syiegpd6/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea1f43100f354c67ef2ffa5fadfa92a9d89eb84f --- /dev/null +++ b/run-syiegpd6/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652f4592b2e6c0793443ddcff78ee6c4cdb268287daeee91eb50511e0af7ab05 +size 1064 diff --git a/run-syiegpd6/checkpoint-1260/trainer_state.json b/run-syiegpd6/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7ad43696e7392cc7a1bb22cbeece435d47c1becc --- /dev/null +++ b/run-syiegpd6/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9246119733924612, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-syiegpd6/checkpoint-1190", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.6782065994840123e-05, + "loss": 1.4012, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8292682926829268, + "eval_loss": 0.9448133707046509, + "eval_runtime": 6.6859, + "eval_samples_per_second": 539.64, + "eval_steps_per_second": 8.525, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 3.3564131989680246e-05, + "loss": 0.98, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 5.034619798452037e-05, + "loss": 0.873, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8445682525634766, + "eval_runtime": 6.7651, + "eval_samples_per_second": 533.329, + "eval_steps_per_second": 8.426, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 6.712826397936049e-05, + "loss": 0.8209, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.8063568472862244, + "eval_runtime": 6.6845, + "eval_samples_per_second": 539.755, + "eval_steps_per_second": 8.527, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 8.391032997420061e-05, + "loss": 0.7995, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00010069239596904074, + "loss": 0.7895, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8422191143035889, + "eval_runtime": 6.7027, + "eval_samples_per_second": 538.287, + "eval_steps_per_second": 8.504, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011747446196388085, + "loss": 0.7789, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00013425652795872098, + "loss": 0.7762, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9193458980044346, + "eval_loss": 0.7983577251434326, + "eval_runtime": 7.0047, + "eval_samples_per_second": 515.085, + "eval_steps_per_second": 8.137, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00015103859395356107, + "loss": 0.7707, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8985587583148559, + "eval_loss": 0.8174644112586975, + "eval_runtime": 6.818, + "eval_samples_per_second": 529.187, + "eval_steps_per_second": 8.36, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016782065994840121, + "loss": 0.7624, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00018460272594324133, + "loss": 0.7565, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9113082039911308, + "eval_loss": 0.8123602271080017, + "eval_runtime": 7.1584, + "eval_samples_per_second": 504.021, + "eval_steps_per_second": 7.963, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00020138479193808147, + "loss": 0.7599, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00021816685793292156, + "loss": 0.7519, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9074279379157428, + "eval_loss": 0.8221088647842407, + "eval_runtime": 6.9443, + "eval_samples_per_second": 519.563, + "eval_steps_per_second": 8.208, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0002349489239277617, + "loss": 0.743, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8342572062084257, + "eval_loss": 0.9307757019996643, + "eval_runtime": 6.7109, + "eval_samples_per_second": 537.634, + "eval_steps_per_second": 8.494, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0002517309899226018, + "loss": 0.7554, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00026851305591744196, + "loss": 0.747, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8883037694013304, + "eval_loss": 0.852607011795044, + "eval_runtime": 6.9181, + "eval_samples_per_second": 521.531, + "eval_steps_per_second": 8.239, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002685748617990294, + "loss": 0.7491, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8259668350219727, + "eval_runtime": 7.1939, + "eval_samples_per_second": 501.537, + "eval_steps_per_second": 7.923, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00026673511858293306, + "loss": 0.7454, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00026365561454264595, + "loss": 0.7389, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9079822616407982, + "eval_loss": 0.8228020668029785, + "eval_runtime": 7.0289, + "eval_samples_per_second": 513.308, + "eval_steps_per_second": 8.109, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002593652386620386, + "loss": 0.7384, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00025390423917523956, + "loss": 0.7339, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9038248337028825, + "eval_loss": 0.8257579803466797, + "eval_runtime": 7.1, + "eval_samples_per_second": 508.172, + "eval_steps_per_second": 8.028, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0002473238459959062, + "loss": 0.728, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8260374665260315, + "eval_runtime": 6.9642, + "eval_samples_per_second": 518.077, + "eval_steps_per_second": 8.185, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00023968579012680159, + "loss": 0.7391, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00023106172455812085, + "loss": 0.7224, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8655764966740577, + "eval_loss": 0.8903546929359436, + "eval_runtime": 7.1236, + "eval_samples_per_second": 506.487, + "eval_steps_per_second": 8.002, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00022153255208714982, + "loss": 0.7306, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00021118766636501292, + "loss": 0.7205, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9041019955654102, + "eval_loss": 0.8240099549293518, + "eval_runtime": 6.8831, + "eval_samples_per_second": 524.184, + "eval_steps_per_second": 8.281, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0002001241132902898, + "loss": 0.7251, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9124168514412417, + "eval_loss": 0.8179566860198975, + "eval_runtime": 6.4667, + "eval_samples_per_second": 557.939, + "eval_steps_per_second": 8.814, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.0001884456806165101, + "loss": 0.7108, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00017626192431396392, + "loss": 0.7159, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.8147507309913635, + "eval_runtime": 6.8379, + "eval_samples_per_second": 527.651, + "eval_steps_per_second": 8.336, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00016368714081957823, + "loss": 0.7093, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.0001508392948162346, + "loss": 0.7126, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8996674057649667, + "eval_loss": 0.8287857174873352, + "eval_runtime": 6.6222, + "eval_samples_per_second": 544.836, + "eval_steps_per_second": 8.607, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0001378389126000853, + "loss": 0.7125, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.807800829410553, + "eval_runtime": 6.4865, + "eval_samples_per_second": 556.231, + "eval_steps_per_second": 8.787, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00012480795141724542, + "loss": 0.7054, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00011186865537667, + "loss": 0.7021, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8086454272270203, + "eval_runtime": 6.8665, + "eval_samples_per_second": 525.451, + "eval_steps_per_second": 8.301, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 9.914240867195727e-05, + "loss": 0.6957, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.815013587474823, + "eval_runtime": 6.8071, + "eval_samples_per_second": 530.034, + "eval_steps_per_second": 8.374, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 8.674859687006062e-05, + "loss": 0.6966, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 7.480348694921928e-05, + "loss": 0.6969, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.8047890067100525, + "eval_runtime": 6.6868, + "eval_samples_per_second": 539.572, + "eval_steps_per_second": 8.524, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.341913659252787e-05, + "loss": 0.6955, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.2702342969117676e-05, + "loss": 0.6939, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.803421139717102, + "eval_runtime": 6.7218, + "eval_samples_per_second": 536.763, + "eval_steps_per_second": 8.48, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.275364086448689e-05, + "loss": 0.6894, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8005704283714294, + "eval_runtime": 6.8206, + "eval_samples_per_second": 528.987, + "eval_steps_per_second": 8.357, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.366635955856929e-05, + "loss": 0.6895, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.5525747299017654e-05, + "loss": 0.6887, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8079735040664673, + "eval_runtime": 6.8653, + "eval_samples_per_second": 525.543, + "eval_steps_per_second": 8.303, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.840817158306065e-05, + "loss": 0.6854, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.2380402750131451e-05, + "loss": 0.692, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9201773835920177, + "eval_loss": 0.8051853775978088, + "eval_runtime": 6.6356, + "eval_samples_per_second": 543.735, + "eval_steps_per_second": 8.59, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 7.49898760592083e-06, + "loss": 0.6897, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9246119733924612, + "eval_loss": 0.7973034381866455, + "eval_runtime": 6.78, + "eval_samples_per_second": 532.151, + "eval_steps_per_second": 8.407, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 3.8097189539196827e-06, + "loss": 0.6833, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.3472060108030543e-06, + "loss": 0.6867, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8028795123100281, + "eval_runtime": 6.9548, + "eval_samples_per_second": 518.782, + "eval_steps_per_second": 8.196, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.34549735609797e-07, + "loss": 0.6885, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.7951526641845703, + "eval_runtime": 6.535, + "eval_samples_per_second": 552.103, + "eval_steps_per_second": 8.722, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00026915851999416657, + "metric": "eval/loss", + "warmup_ratio": 0.330612213985626 + } +} diff --git a/run-syiegpd6/checkpoint-1260/training_args.bin b/run-syiegpd6/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8e2f3e00344fd3858ae85ebdb433294e6f5fafb --- /dev/null +++ b/run-syiegpd6/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1fe202d72d38e88746a685486e93d2c032ac2589f11862308b117e3f8103f8b +size 4792 diff --git a/run-tc087f50/checkpoint-616/model.safetensors b/run-tc087f50/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c20243305521411824b95c2d316cd2b2462e6df6 --- /dev/null +++ b/run-tc087f50/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6c4e817bab339cb38980c5dcda9c4e60a037ce15f9a29f9311470c0ef1f26d +size 198025308 diff --git a/run-tc087f50/checkpoint-616/optimizer.pt b/run-tc087f50/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c29d0d60b930de2d398131dcadcd6e7347245eec --- /dev/null +++ b/run-tc087f50/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bee113bda0d6e761026d273accee22838e4c17319e1d6b50669aa7026147dd4 +size 395900602 diff --git a/run-tc087f50/checkpoint-616/rng_state.pth b/run-tc087f50/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-tc087f50/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-tc087f50/checkpoint-616/scheduler.pt b/run-tc087f50/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4026d3eebae9ce4e103999d1f374590502090ff3 --- /dev/null +++ b/run-tc087f50/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f63b1ac7eb43a44929464c509d5457a0f177c7c367a2935a7a9fedbfb549a7 +size 1064 diff --git a/run-tc087f50/checkpoint-616/trainer_state.json b/run-tc087f50/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..81089f8bafcb6d97abbdf36d3a70c984a3caf4f3 --- /dev/null +++ b/run-tc087f50/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9015699139945296, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-tc087f50/checkpoint-446", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.825523070831868e-07, + "loss": 1.5366, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.46147450110864746, + "eval_f1": 0.543503629480541, + "eval_loss": 1.5072089433670044, + "eval_precision": 0.6849772994427523, + "eval_recall": 0.46147450110864746, + "eval_runtime": 8.1831, + "eval_samples_per_second": 440.91, + "eval_steps_per_second": 3.544, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.5651046141663736e-06, + "loss": 1.5153, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.3476569212495604e-06, + "loss": 1.4663, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.7161862527716186, + "eval_f1": 0.7082378548950855, + "eval_loss": 1.414753794670105, + "eval_precision": 0.7032588436684571, + "eval_recall": 0.7161862527716186, + "eval_runtime": 7.8255, + "eval_samples_per_second": 461.057, + "eval_steps_per_second": 3.706, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.1302092283327473e-06, + "loss": 1.3999, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8203991130820399, + "eval_f1": 0.7493930758204594, + "eval_loss": 1.260522484779358, + "eval_precision": 0.7232890019559242, + "eval_recall": 0.8203991130820399, + "eval_runtime": 8.0189, + "eval_samples_per_second": 449.937, + "eval_steps_per_second": 3.616, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 3.912761535415934e-06, + "loss": 1.3056, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 4.695313842499121e-06, + "loss": 1.1854, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 1.0719598531723022, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.1228, + "eval_samples_per_second": 444.184, + "eval_steps_per_second": 3.57, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 5.477866149582308e-06, + "loss": 1.0815, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 5.7146592981701326e-06, + "loss": 0.9912, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9733331799507141, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.7063, + "eval_samples_per_second": 468.188, + "eval_steps_per_second": 3.763, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 5.694824599552223e-06, + "loss": 0.9589, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8398004434589801, + "eval_f1": 0.7769258943303846, + "eval_loss": 0.9288539886474609, + "eval_precision": 0.7676069969016963, + "eval_recall": 0.8398004434589801, + "eval_runtime": 8.1577, + "eval_samples_per_second": 442.281, + "eval_steps_per_second": 3.555, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 5.658474183253441e-06, + "loss": 0.9347, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 5.605819776818873e-06, + "loss": 0.9119, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8603104212860311, + "eval_f1": 0.8148146918930934, + "eval_loss": 0.897085428237915, + "eval_precision": 0.8595360810373311, + "eval_recall": 0.8603104212860311, + "eval_runtime": 7.6018, + "eval_samples_per_second": 474.626, + "eval_steps_per_second": 3.815, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 5.537168072422846e-06, + "loss": 0.9037, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 5.452918940502006e-06, + "loss": 0.8774, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8736141906873615, + "eval_f1": 0.8407481182455367, + "eval_loss": 0.8833092451095581, + "eval_precision": 0.8635484967383006, + "eval_recall": 0.8736141906873615, + "eval_runtime": 8.1782, + "eval_samples_per_second": 441.171, + "eval_steps_per_second": 3.546, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 5.353563100659965e-06, + "loss": 0.8692, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.883869179600887, + "eval_f1": 0.8595664190059464, + "eval_loss": 0.8708497285842896, + "eval_precision": 0.876543691868009, + "eval_recall": 0.883869179600887, + "eval_runtime": 7.6158, + "eval_samples_per_second": 473.753, + "eval_steps_per_second": 3.808, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 5.239679263409603e-06, + "loss": 0.8708, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 5.111930759401325e-06, + "loss": 0.8642, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8706002939081835, + "eval_loss": 0.857717752456665, + "eval_precision": 0.8864692880842098, + "eval_recall": 0.8902439024390244, + "eval_runtime": 7.9859, + "eval_samples_per_second": 451.799, + "eval_steps_per_second": 3.631, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 4.971061675770743e-06, + "loss": 0.8487, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.891629711751663, + "eval_f1": 0.8758911374184237, + "eval_loss": 0.8475823402404785, + "eval_precision": 0.8841603734593992, + "eval_recall": 0.891629711751663, + "eval_runtime": 8.0817, + "eval_samples_per_second": 446.442, + "eval_steps_per_second": 3.588, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 4.81789252211011e-06, + "loss": 0.8573, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 4.653315451307604e-06, + "loss": 0.8435, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8988359201773836, + "eval_f1": 0.8847093433953636, + "eval_loss": 0.8407472372055054, + "eval_precision": 0.8895883534490413, + "eval_recall": 0.8988359201773836, + "eval_runtime": 7.9939, + "eval_samples_per_second": 451.345, + "eval_steps_per_second": 3.628, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 4.478289063091274e-06, + "loss": 0.8369, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 4.293832820545054e-06, + "loss": 0.8358, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8857868612751701, + "eval_loss": 0.8404232263565063, + "eval_precision": 0.8877721748068556, + "eval_recall": 0.8977272727272727, + "eval_runtime": 8.0417, + "eval_samples_per_second": 448.659, + "eval_steps_per_second": 3.606, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 4.1010211121185555e-06, + "loss": 0.8272, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8982815964523282, + "eval_f1": 0.8864563790356156, + "eval_loss": 0.83742356300354, + "eval_precision": 0.8881012687639953, + "eval_recall": 0.8982815964523282, + "eval_runtime": 7.6711, + "eval_samples_per_second": 470.336, + "eval_steps_per_second": 3.78, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 3.900976993717205e-06, + "loss": 0.8288, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 3.694865647322682e-06, + "loss": 0.8284, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.8919323879231678, + "eval_loss": 0.8299400806427002, + "eval_precision": 0.8899853911035873, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.8566, + "eval_samples_per_second": 459.23, + "eval_steps_per_second": 3.691, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 3.483887594244775e-06, + "loss": 0.8187, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 3.269271702534901e-06, + "loss": 0.8232, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9007760532150776, + "eval_f1": 0.8900669756270255, + "eval_loss": 0.8269869685173035, + "eval_precision": 0.8898728723784425, + "eval_recall": 0.9007760532150776, + "eval_runtime": 7.9497, + "eval_samples_per_second": 453.851, + "eval_steps_per_second": 3.648, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 3.0522680292904683e-06, + "loss": 0.8173, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8920077052833563, + "eval_loss": 0.8250635862350464, + "eval_precision": 0.8890677921992146, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.8955, + "eval_samples_per_second": 456.967, + "eval_steps_per_second": 3.673, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 2.8341405395410117e-06, + "loss": 0.8187, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 2.6161597441258218e-06, + "loss": 0.8136, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9035476718403548, + "eval_f1": 0.8948359204851519, + "eval_loss": 0.8239994049072266, + "eval_precision": 0.8925147329223945, + "eval_recall": 0.9035476718403548, + "eval_runtime": 8.0756, + "eval_samples_per_second": 446.777, + "eval_steps_per_second": 3.591, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 2.3995952994446972e-06, + "loss": 0.8124, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 2.185708612185482e-06, + "loss": 0.8099, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9035476718403548, + "eval_f1": 0.8932992657149587, + "eval_loss": 0.8201805353164673, + "eval_precision": 0.8926646365293553, + "eval_recall": 0.9035476718403548, + "eval_runtime": 7.4107, + "eval_samples_per_second": 486.865, + "eval_steps_per_second": 3.913, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.9757454921030833e-06, + "loss": 0.809, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8929836921369333, + "eval_loss": 0.8218014240264893, + "eval_precision": 0.890667964163023, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.623, + "eval_samples_per_second": 473.303, + "eval_steps_per_second": 3.804, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.7709288956447703e-06, + "loss": 0.813, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 1.5724518026874334e-06, + "loss": 0.8083, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.9015699139945296, + "eval_loss": 0.8185763955116272, + "eval_precision": 0.8991606392883312, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.685, + "eval_samples_per_second": 469.488, + "eval_steps_per_second": 3.774, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.381470267877108e-06, + "loss": 0.8131, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.8960601855203288, + "eval_loss": 0.8211078643798828, + "eval_precision": 0.8932721527546629, + "eval_recall": 0.9046563192904656, + "eval_runtime": 7.9727, + "eval_samples_per_second": 452.544, + "eval_steps_per_second": 3.637, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.199096687044141e-06, + "loss": 0.8049, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.026393317914576e-06, + "loss": 0.8064, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9005980988677819, + "eval_loss": 0.8150145411491394, + "eval_precision": 0.8983880287211945, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0567, + "eval_samples_per_second": 447.828, + "eval_steps_per_second": 3.6, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 8.643660928571986e-07, + "loss": 0.8061, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 7.13958759704675e-07, + "loss": 0.8054, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.8977369767565613, + "eval_loss": 0.8158100247383118, + "eval_precision": 0.895893633259947, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.7883, + "eval_samples_per_second": 463.261, + "eval_steps_per_second": 3.724, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 5.760473847762896e-07, + "loss": 0.8036, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9003024938294941, + "eval_loss": 0.8164046406745911, + "eval_precision": 0.8976899432836397, + "eval_recall": 0.9088137472283814, + "eval_runtime": 8.0644, + "eval_samples_per_second": 447.401, + "eval_steps_per_second": 3.596, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 4.514352501201184e-07, + "loss": 0.8021, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 3.4084817469630267e-07, + "loss": 0.8071, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.900355602919387, + "eval_loss": 0.8134579062461853, + "eval_precision": 0.8978478131954157, + "eval_recall": 0.9082594235033259, + "eval_runtime": 7.4682, + "eval_samples_per_second": 483.113, + "eval_steps_per_second": 3.883, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.449302867537536e-07, + "loss": 0.8102, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.6424027202462116e-07, + "loss": 0.8001, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.898782406467973, + "eval_loss": 0.8171702027320862, + "eval_precision": 0.8954747155430337, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.6554, + "eval_samples_per_second": 471.303, + "eval_steps_per_second": 3.788, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 9.924811958939133e-08, + "loss": 0.7998, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9009586571176782, + "eval_loss": 0.8134816884994507, + "eval_precision": 0.8979096680328688, + "eval_recall": 0.9088137472283814, + "eval_runtime": 8.204, + "eval_samples_per_second": 439.785, + "eval_steps_per_second": 3.535, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 5.0332384366716324e-08, + "loss": 0.8127, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.7777982172954952e-08, + "loss": 0.8013, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9001837162870608, + "eval_loss": 0.8134917616844177, + "eval_precision": 0.8975442963035055, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0752, + "eval_samples_per_second": 446.799, + "eval_steps_per_second": 3.591, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3097021608461137, + "learning_rate": 5.718651474838673e-06, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-tc087f50/checkpoint-616/training_args.bin b/run-tc087f50/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..921fc4a5544f242f30ac88a81b691c9a67c638a2 --- /dev/null +++ b/run-tc087f50/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5beed2f567febdfd07539acba72ec9fa21c6e877a8b5c2675b26b0af90fa913 +size 4792 diff --git a/run-tc087f50/checkpoint-630/model.safetensors b/run-tc087f50/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0fe33c5978cfe4df41522c4fe3ba852c7fcc3128 --- /dev/null +++ b/run-tc087f50/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4932c6d35d3ca697cd386724cbe005ab4dc97752cb84c2e3ef2c1e8d1b98eb0f +size 198025308 diff --git a/run-tc087f50/checkpoint-630/optimizer.pt b/run-tc087f50/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..57358669fa33bac404c4e72dca275cb30554d772 --- /dev/null +++ b/run-tc087f50/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acaffcddb90135443f0bdf156054e652c54ed5bb194dcf6626f330a6deab994b +size 395900602 diff --git a/run-tc087f50/checkpoint-630/rng_state.pth b/run-tc087f50/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-tc087f50/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-tc087f50/checkpoint-630/scheduler.pt b/run-tc087f50/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6130bf879dac96d37fcaa5f2094f47052d3f457a --- /dev/null +++ b/run-tc087f50/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a8e7dbc9fdae8fb5e0b607d8ee9b07d7d4fc47643143f4e827d64e97b10728 +size 1064 diff --git a/run-tc087f50/checkpoint-630/trainer_state.json b/run-tc087f50/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..051545ee4615b599b77e810eb6c19d47484c974f --- /dev/null +++ b/run-tc087f50/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9016704166656572, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-tc087f50/checkpoint-630", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.825523070831868e-07, + "loss": 1.5366, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.46147450110864746, + "eval_f1": 0.543503629480541, + "eval_loss": 1.5072089433670044, + "eval_precision": 0.6849772994427523, + "eval_recall": 0.46147450110864746, + "eval_runtime": 8.1831, + "eval_samples_per_second": 440.91, + "eval_steps_per_second": 3.544, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.5651046141663736e-06, + "loss": 1.5153, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 2.3476569212495604e-06, + "loss": 1.4663, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.7161862527716186, + "eval_f1": 0.7082378548950855, + "eval_loss": 1.414753794670105, + "eval_precision": 0.7032588436684571, + "eval_recall": 0.7161862527716186, + "eval_runtime": 7.8255, + "eval_samples_per_second": 461.057, + "eval_steps_per_second": 3.706, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 3.1302092283327473e-06, + "loss": 1.3999, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8203991130820399, + "eval_f1": 0.7493930758204594, + "eval_loss": 1.260522484779358, + "eval_precision": 0.7232890019559242, + "eval_recall": 0.8203991130820399, + "eval_runtime": 8.0189, + "eval_samples_per_second": 449.937, + "eval_steps_per_second": 3.616, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 3.912761535415934e-06, + "loss": 1.3056, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 4.695313842499121e-06, + "loss": 1.1854, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 1.0719598531723022, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.1228, + "eval_samples_per_second": 444.184, + "eval_steps_per_second": 3.57, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 5.477866149582308e-06, + "loss": 1.0815, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 5.7146592981701326e-06, + "loss": 0.9912, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9733331799507141, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 7.7063, + "eval_samples_per_second": 468.188, + "eval_steps_per_second": 3.763, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 5.694824599552223e-06, + "loss": 0.9589, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8398004434589801, + "eval_f1": 0.7769258943303846, + "eval_loss": 0.9288539886474609, + "eval_precision": 0.7676069969016963, + "eval_recall": 0.8398004434589801, + "eval_runtime": 8.1577, + "eval_samples_per_second": 442.281, + "eval_steps_per_second": 3.555, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 5.658474183253441e-06, + "loss": 0.9347, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 5.605819776818873e-06, + "loss": 0.9119, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8603104212860311, + "eval_f1": 0.8148146918930934, + "eval_loss": 0.897085428237915, + "eval_precision": 0.8595360810373311, + "eval_recall": 0.8603104212860311, + "eval_runtime": 7.6018, + "eval_samples_per_second": 474.626, + "eval_steps_per_second": 3.815, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 5.537168072422846e-06, + "loss": 0.9037, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 5.452918940502006e-06, + "loss": 0.8774, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8736141906873615, + "eval_f1": 0.8407481182455367, + "eval_loss": 0.8833092451095581, + "eval_precision": 0.8635484967383006, + "eval_recall": 0.8736141906873615, + "eval_runtime": 8.1782, + "eval_samples_per_second": 441.171, + "eval_steps_per_second": 3.546, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 5.353563100659965e-06, + "loss": 0.8692, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.883869179600887, + "eval_f1": 0.8595664190059464, + "eval_loss": 0.8708497285842896, + "eval_precision": 0.876543691868009, + "eval_recall": 0.883869179600887, + "eval_runtime": 7.6158, + "eval_samples_per_second": 473.753, + "eval_steps_per_second": 3.808, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 5.239679263409603e-06, + "loss": 0.8708, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 5.111930759401325e-06, + "loss": 0.8642, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8706002939081835, + "eval_loss": 0.857717752456665, + "eval_precision": 0.8864692880842098, + "eval_recall": 0.8902439024390244, + "eval_runtime": 7.9859, + "eval_samples_per_second": 451.799, + "eval_steps_per_second": 3.631, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 4.971061675770743e-06, + "loss": 0.8487, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.891629711751663, + "eval_f1": 0.8758911374184237, + "eval_loss": 0.8475823402404785, + "eval_precision": 0.8841603734593992, + "eval_recall": 0.891629711751663, + "eval_runtime": 8.0817, + "eval_samples_per_second": 446.442, + "eval_steps_per_second": 3.588, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 4.81789252211011e-06, + "loss": 0.8573, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 4.653315451307604e-06, + "loss": 0.8435, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8988359201773836, + "eval_f1": 0.8847093433953636, + "eval_loss": 0.8407472372055054, + "eval_precision": 0.8895883534490413, + "eval_recall": 0.8988359201773836, + "eval_runtime": 7.9939, + "eval_samples_per_second": 451.345, + "eval_steps_per_second": 3.628, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 4.478289063091274e-06, + "loss": 0.8369, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 4.293832820545054e-06, + "loss": 0.8358, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8857868612751701, + "eval_loss": 0.8404232263565063, + "eval_precision": 0.8877721748068556, + "eval_recall": 0.8977272727272727, + "eval_runtime": 8.0417, + "eval_samples_per_second": 448.659, + "eval_steps_per_second": 3.606, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 4.1010211121185555e-06, + "loss": 0.8272, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8982815964523282, + "eval_f1": 0.8864563790356156, + "eval_loss": 0.83742356300354, + "eval_precision": 0.8881012687639953, + "eval_recall": 0.8982815964523282, + "eval_runtime": 7.6711, + "eval_samples_per_second": 470.336, + "eval_steps_per_second": 3.78, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 3.900976993717205e-06, + "loss": 0.8288, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 3.694865647322682e-06, + "loss": 0.8284, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.8919323879231678, + "eval_loss": 0.8299400806427002, + "eval_precision": 0.8899853911035873, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.8566, + "eval_samples_per_second": 459.23, + "eval_steps_per_second": 3.691, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 3.483887594244775e-06, + "loss": 0.8187, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 3.269271702534901e-06, + "loss": 0.8232, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9007760532150776, + "eval_f1": 0.8900669756270255, + "eval_loss": 0.8269869685173035, + "eval_precision": 0.8898728723784425, + "eval_recall": 0.9007760532150776, + "eval_runtime": 7.9497, + "eval_samples_per_second": 453.851, + "eval_steps_per_second": 3.648, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 3.0522680292904683e-06, + "loss": 0.8173, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8920077052833563, + "eval_loss": 0.8250635862350464, + "eval_precision": 0.8890677921992146, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.8955, + "eval_samples_per_second": 456.967, + "eval_steps_per_second": 3.673, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 2.8341405395410117e-06, + "loss": 0.8187, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 2.6161597441258218e-06, + "loss": 0.8136, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9035476718403548, + "eval_f1": 0.8948359204851519, + "eval_loss": 0.8239994049072266, + "eval_precision": 0.8925147329223945, + "eval_recall": 0.9035476718403548, + "eval_runtime": 8.0756, + "eval_samples_per_second": 446.777, + "eval_steps_per_second": 3.591, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 2.3995952994446972e-06, + "loss": 0.8124, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 2.185708612185482e-06, + "loss": 0.8099, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9035476718403548, + "eval_f1": 0.8932992657149587, + "eval_loss": 0.8201805353164673, + "eval_precision": 0.8926646365293553, + "eval_recall": 0.9035476718403548, + "eval_runtime": 7.4107, + "eval_samples_per_second": 486.865, + "eval_steps_per_second": 3.913, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.9757454921030833e-06, + "loss": 0.809, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9010532150776053, + "eval_f1": 0.8929836921369333, + "eval_loss": 0.8218014240264893, + "eval_precision": 0.890667964163023, + "eval_recall": 0.9010532150776053, + "eval_runtime": 7.623, + "eval_samples_per_second": 473.303, + "eval_steps_per_second": 3.804, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.7709288956447703e-06, + "loss": 0.813, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 1.5724518026874334e-06, + "loss": 0.8083, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.9015699139945296, + "eval_loss": 0.8185763955116272, + "eval_precision": 0.8991606392883312, + "eval_recall": 0.9090909090909091, + "eval_runtime": 7.685, + "eval_samples_per_second": 469.488, + "eval_steps_per_second": 3.774, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.381470267877108e-06, + "loss": 0.8131, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.8960601855203288, + "eval_loss": 0.8211078643798828, + "eval_precision": 0.8932721527546629, + "eval_recall": 0.9046563192904656, + "eval_runtime": 7.9727, + "eval_samples_per_second": 452.544, + "eval_steps_per_second": 3.637, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.199096687044141e-06, + "loss": 0.8049, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 1.026393317914576e-06, + "loss": 0.8064, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9005980988677819, + "eval_loss": 0.8150145411491394, + "eval_precision": 0.8983880287211945, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0567, + "eval_samples_per_second": 447.828, + "eval_steps_per_second": 3.6, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 8.643660928571986e-07, + "loss": 0.8061, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 7.13958759704675e-07, + "loss": 0.8054, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.8977369767565613, + "eval_loss": 0.8158100247383118, + "eval_precision": 0.895893633259947, + "eval_recall": 0.905210643015521, + "eval_runtime": 7.7883, + "eval_samples_per_second": 463.261, + "eval_steps_per_second": 3.724, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 5.760473847762896e-07, + "loss": 0.8036, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9003024938294941, + "eval_loss": 0.8164046406745911, + "eval_precision": 0.8976899432836397, + "eval_recall": 0.9088137472283814, + "eval_runtime": 8.0644, + "eval_samples_per_second": 447.401, + "eval_steps_per_second": 3.596, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 4.514352501201184e-07, + "loss": 0.8021, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 3.4084817469630267e-07, + "loss": 0.8071, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.900355602919387, + "eval_loss": 0.8134579062461853, + "eval_precision": 0.8978478131954157, + "eval_recall": 0.9082594235033259, + "eval_runtime": 7.4682, + "eval_samples_per_second": 483.113, + "eval_steps_per_second": 3.883, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.449302867537536e-07, + "loss": 0.8102, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.6424027202462116e-07, + "loss": 0.8001, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9068736141906873, + "eval_f1": 0.898782406467973, + "eval_loss": 0.8171702027320862, + "eval_precision": 0.8954747155430337, + "eval_recall": 0.9068736141906873, + "eval_runtime": 7.6554, + "eval_samples_per_second": 471.303, + "eval_steps_per_second": 3.788, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 9.924811958939133e-08, + "loss": 0.7998, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9088137472283814, + "eval_f1": 0.9009586571176782, + "eval_loss": 0.8134816884994507, + "eval_precision": 0.8979096680328688, + "eval_recall": 0.9088137472283814, + "eval_runtime": 8.204, + "eval_samples_per_second": 439.785, + "eval_steps_per_second": 3.535, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 5.0332384366716324e-08, + "loss": 0.8127, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.7777982172954952e-08, + "loss": 0.8013, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9001837162870608, + "eval_loss": 0.8134917616844177, + "eval_precision": 0.8975442963035055, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0752, + "eval_samples_per_second": 446.799, + "eval_steps_per_second": 3.591, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.774530194374927e-09, + "loss": 0.8056, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9016704166656572, + "eval_loss": 0.8160110116004944, + "eval_precision": 0.8987786946646469, + "eval_recall": 0.9093680709534369, + "eval_runtime": 7.8549, + "eval_samples_per_second": 459.332, + "eval_steps_per_second": 3.692, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.3097021608461137, + "learning_rate": 5.718651474838673e-06, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-tc087f50/checkpoint-630/training_args.bin b/run-tc087f50/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..921fc4a5544f242f30ac88a81b691c9a67c638a2 --- /dev/null +++ b/run-tc087f50/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5beed2f567febdfd07539acba72ec9fa21c6e877a8b5c2675b26b0af90fa913 +size 4792 diff --git a/run-tdm55qka/checkpoint-1232/model.safetensors b/run-tdm55qka/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75c0c9819e265eb4a062e8aa6e879294a38e73eb --- /dev/null +++ b/run-tdm55qka/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38e01cebdfb78f1874b01398c7991c26ec6a18cb0d79b27626a12d2e6f7c780c +size 198025308 diff --git a/run-tdm55qka/checkpoint-1232/optimizer.pt b/run-tdm55qka/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..be9190f2667b30869fcb672e28ddb2b99708aa2f --- /dev/null +++ b/run-tdm55qka/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9941a63f5cbc216d199f3ed975045855fac8b3fd5f846220237a7c6892f60cd7 +size 395900602 diff --git a/run-tdm55qka/checkpoint-1232/rng_state.pth b/run-tdm55qka/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-tdm55qka/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-tdm55qka/checkpoint-1232/scheduler.pt b/run-tdm55qka/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9376ba1123f28334f527f2d822ff0cba2ce4df8a --- /dev/null +++ b/run-tdm55qka/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a173512206fdadc865b4d7425575a4f668d3d0cf0d6775d9da5dbe32007a61a +size 1064 diff --git a/run-tdm55qka/checkpoint-1232/trainer_state.json b/run-tdm55qka/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f7ec630132295afebcff5c0fa3cfa14caa0fd5d5 --- /dev/null +++ b/run-tdm55qka/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9157427937915743, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-tdm55qka/checkpoint-1062", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.074330885930957e-05, + "loss": 1.2131, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.9553517699241638, + "eval_runtime": 6.7205, + "eval_samples_per_second": 536.864, + "eval_steps_per_second": 8.482, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00014148661771861914, + "loss": 0.8629, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021222992657792873, + "loss": 0.8113, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8628048780487805, + "eval_loss": 0.9110397100448608, + "eval_runtime": 6.8183, + "eval_samples_per_second": 529.161, + "eval_steps_per_second": 8.36, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002829732354372383, + "loss": 0.7987, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.835228681564331, + "eval_runtime": 6.8077, + "eval_samples_per_second": 529.986, + "eval_steps_per_second": 8.373, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00035371654429654784, + "loss": 0.7993, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00042445985315585745, + "loss": 0.7879, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6377494456762749, + "eval_loss": 1.2046802043914795, + "eval_runtime": 6.6938, + "eval_samples_per_second": 539.002, + "eval_steps_per_second": 8.515, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004952031620151671, + "loss": 0.7941, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005659464708744766, + "loss": 0.7997, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9018847006651884, + "eval_loss": 0.8322978615760803, + "eval_runtime": 6.7732, + "eval_samples_per_second": 532.688, + "eval_steps_per_second": 8.416, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006366897797337862, + "loss": 0.7972, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8553215077605322, + "eval_loss": 0.9102441072463989, + "eval_runtime": 6.9627, + "eval_samples_per_second": 518.19, + "eval_steps_per_second": 8.186, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007074330885930957, + "loss": 0.8132, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007781763974524053, + "loss": 0.82, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.7907427937915743, + "eval_loss": 0.9799911379814148, + "eval_runtime": 6.7596, + "eval_samples_per_second": 533.757, + "eval_steps_per_second": 8.432, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0008489197063117149, + "loss": 0.8365, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0008949685513502196, + "loss": 0.8214, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8802660753880266, + "eval_loss": 0.8667421340942383, + "eval_runtime": 6.7798, + "eval_samples_per_second": 532.167, + "eval_steps_per_second": 8.407, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0008920569222601872, + "loss": 0.815, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8830376940133038, + "eval_loss": 0.8685916066169739, + "eval_runtime": 6.9552, + "eval_samples_per_second": 518.747, + "eval_steps_per_second": 8.195, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0008857262042472297, + "loss": 0.8304, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008760250964691541, + "loss": 0.8286, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.858370288248337, + "eval_loss": 0.9249398708343506, + "eval_runtime": 6.9742, + "eval_samples_per_second": 517.333, + "eval_steps_per_second": 8.173, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0008630282248653272, + "loss": 0.8173, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.858370288248337, + "eval_loss": 0.9206698536872864, + "eval_runtime": 6.7811, + "eval_samples_per_second": 532.064, + "eval_steps_per_second": 8.406, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008468355680953472, + "loss": 0.8171, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008275716884514749, + "loss": 0.8195, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8727827050997783, + "eval_loss": 0.8787004947662354, + "eval_runtime": 7.0001, + "eval_samples_per_second": 515.421, + "eval_steps_per_second": 8.143, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0008053847736610466, + "loss": 0.8162, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0007804454969498183, + "loss": 0.8188, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8523863554000854, + "eval_runtime": 6.7373, + "eval_samples_per_second": 535.526, + "eval_steps_per_second": 8.46, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0007529457041352247, + "loss": 0.8004, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.871119733924612, + "eval_loss": 0.8795638084411621, + "eval_runtime": 6.8513, + "eval_samples_per_second": 526.615, + "eval_steps_per_second": 8.32, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0007230969378491112, + "loss": 0.8106, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0006911288102423787, + "loss": 0.7925, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.830922544002533, + "eval_runtime": 6.7362, + "eval_samples_per_second": 535.617, + "eval_steps_per_second": 8.462, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0006572872366895424, + "loss": 0.7981, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0006218325440804596, + "loss": 0.7874, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8747228381374723, + "eval_loss": 0.8838471174240112, + "eval_runtime": 6.887, + "eval_samples_per_second": 523.885, + "eval_steps_per_second": 8.276, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005850374682512273, + "loss": 0.7818, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.8282538652420044, + "eval_runtime": 6.5474, + "eval_samples_per_second": 551.062, + "eval_steps_per_second": 8.706, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.000547185055959046, + "loss": 0.7736, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0005085664875401447, + "loss": 0.7698, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8233076333999634, + "eval_runtime": 6.7542, + "eval_samples_per_second": 534.183, + "eval_steps_per_second": 8.439, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00046947883700000723, + "loss": 0.7627, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00043022278676644893, + "loss": 0.7585, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8240878582000732, + "eval_runtime": 6.9257, + "eval_samples_per_second": 520.959, + "eval_steps_per_second": 8.23, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0003911003146848392, + "loss": 0.7577, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8966186252771619, + "eval_loss": 0.8346340656280518, + "eval_runtime": 6.7243, + "eval_samples_per_second": 536.559, + "eval_steps_per_second": 8.477, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003524123710483028, + "loss": 0.7511, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00031445656353238554, + "loss": 0.7377, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9027161862527716, + "eval_loss": 0.8260533213615417, + "eval_runtime": 6.8531, + "eval_samples_per_second": 526.474, + "eval_steps_per_second": 8.317, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0002775248678428634, + "loss": 0.7321, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8221997022628784, + "eval_runtime": 6.8486, + "eval_samples_per_second": 526.82, + "eval_steps_per_second": 8.323, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00024190138168757463, + "loss": 0.7397, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00020786013934989006, + "loss": 0.7311, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.811286449432373, + "eval_runtime": 7.0004, + "eval_samples_per_second": 515.402, + "eval_steps_per_second": 8.142, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.0001756630036752477, + "loss": 0.7268, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00014555765168668586, + "loss": 0.7222, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8159306645393372, + "eval_runtime": 6.6188, + "eval_samples_per_second": 545.114, + "eval_steps_per_second": 8.612, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00011777566932505948, + "loss": 0.7157, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8122764229774475, + "eval_runtime": 7.0008, + "eval_samples_per_second": 515.372, + "eval_steps_per_second": 8.142, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 9.253076997018475e-05, + "loss": 0.7145, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.001715044696862e-05, + "loss": 0.7115, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.820366621017456, + "eval_runtime": 6.6102, + "eval_samples_per_second": 545.826, + "eval_steps_per_second": 8.623, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.040799716297464e-05, + "loss": 0.7098, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.3854153868987306e-05, + "loss": 0.7137, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8120188117027283, + "eval_runtime": 6.4977, + "eval_samples_per_second": 555.273, + "eval_steps_per_second": 8.772, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.0482961290850264e-05, + "loss": 0.707, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8137621283531189, + "eval_runtime": 6.9125, + "eval_samples_per_second": 521.954, + "eval_steps_per_second": 8.246, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.0397277558730457e-05, + "loss": 0.7006, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.6746869691726967e-06, + "loss": 0.7036, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8131329417228699, + "eval_runtime": 7.0203, + "eval_samples_per_second": 513.941, + "eval_steps_per_second": 8.119, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.000895174946719725, + "metric": "eval/loss", + "warmup_ratio": 0.2607123622313521 + } +} diff --git a/run-tdm55qka/checkpoint-1232/training_args.bin b/run-tdm55qka/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..825e8e9606fc5708a9ffd0dcfd3d6f82d328fb15 --- /dev/null +++ b/run-tdm55qka/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df992ac9eb363aaf2af9b9f9517b8e07a02d2d87b71fdda3d1b931962a2fe959 +size 4792 diff --git a/run-tdm55qka/checkpoint-1260/model.safetensors b/run-tdm55qka/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cee4ba3ec92e312f5dbf6f78767fcb550bea873f --- /dev/null +++ b/run-tdm55qka/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7eff4cee2606407db3197e4bd668a37b642dc43d1d7169b2492d9a520275f2 +size 198025308 diff --git a/run-tdm55qka/checkpoint-1260/optimizer.pt b/run-tdm55qka/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d881375c454e5066c0e2985858cd901fb72cdc08 --- /dev/null +++ b/run-tdm55qka/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70bb25a07b91fb7aa6c0c3118cfcbe3ced063bc4865bb6f3bc18f9f2d6c9db7c +size 395900602 diff --git a/run-tdm55qka/checkpoint-1260/rng_state.pth b/run-tdm55qka/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-tdm55qka/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-tdm55qka/checkpoint-1260/scheduler.pt b/run-tdm55qka/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..db873caff4e274ac328a5570b97f9fa91ec4ff45 --- /dev/null +++ b/run-tdm55qka/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:370f44b01c218bdbfb124c84c722bda964757c499dae656f667bc9045f9ae594 +size 1064 diff --git a/run-tdm55qka/checkpoint-1260/trainer_state.json b/run-tdm55qka/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..96ea8d590a53370a3ddfa4cfd89b4012bdf554f7 --- /dev/null +++ b/run-tdm55qka/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9176829268292683, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-tdm55qka/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 7.074330885930957e-05, + "loss": 1.2131, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8902439024390244, + "eval_loss": 0.9553517699241638, + "eval_runtime": 6.7205, + "eval_samples_per_second": 536.864, + "eval_steps_per_second": 8.482, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00014148661771861914, + "loss": 0.8629, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00021222992657792873, + "loss": 0.8113, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8628048780487805, + "eval_loss": 0.9110397100448608, + "eval_runtime": 6.8183, + "eval_samples_per_second": 529.161, + "eval_steps_per_second": 8.36, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002829732354372383, + "loss": 0.7987, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.835228681564331, + "eval_runtime": 6.8077, + "eval_samples_per_second": 529.986, + "eval_steps_per_second": 8.373, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00035371654429654784, + "loss": 0.7993, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00042445985315585745, + "loss": 0.7879, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6377494456762749, + "eval_loss": 1.2046802043914795, + "eval_runtime": 6.6938, + "eval_samples_per_second": 539.002, + "eval_steps_per_second": 8.515, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004952031620151671, + "loss": 0.7941, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.0005659464708744766, + "loss": 0.7997, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9018847006651884, + "eval_loss": 0.8322978615760803, + "eval_runtime": 6.7732, + "eval_samples_per_second": 532.688, + "eval_steps_per_second": 8.416, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0006366897797337862, + "loss": 0.7972, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8553215077605322, + "eval_loss": 0.9102441072463989, + "eval_runtime": 6.9627, + "eval_samples_per_second": 518.19, + "eval_steps_per_second": 8.186, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0007074330885930957, + "loss": 0.8132, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0007781763974524053, + "loss": 0.82, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.7907427937915743, + "eval_loss": 0.9799911379814148, + "eval_runtime": 6.7596, + "eval_samples_per_second": 533.757, + "eval_steps_per_second": 8.432, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0008489197063117149, + "loss": 0.8365, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.0008949685513502196, + "loss": 0.8214, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8802660753880266, + "eval_loss": 0.8667421340942383, + "eval_runtime": 6.7798, + "eval_samples_per_second": 532.167, + "eval_steps_per_second": 8.407, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0008920569222601872, + "loss": 0.815, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8830376940133038, + "eval_loss": 0.8685916066169739, + "eval_runtime": 6.9552, + "eval_samples_per_second": 518.747, + "eval_steps_per_second": 8.195, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0008857262042472297, + "loss": 0.8304, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0008760250964691541, + "loss": 0.8286, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.858370288248337, + "eval_loss": 0.9249398708343506, + "eval_runtime": 6.9742, + "eval_samples_per_second": 517.333, + "eval_steps_per_second": 8.173, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0008630282248653272, + "loss": 0.8173, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.858370288248337, + "eval_loss": 0.9206698536872864, + "eval_runtime": 6.7811, + "eval_samples_per_second": 532.064, + "eval_steps_per_second": 8.406, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0008468355680953472, + "loss": 0.8171, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.0008275716884514749, + "loss": 0.8195, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8727827050997783, + "eval_loss": 0.8787004947662354, + "eval_runtime": 7.0001, + "eval_samples_per_second": 515.421, + "eval_steps_per_second": 8.143, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0008053847736610466, + "loss": 0.8162, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0007804454969498183, + "loss": 0.8188, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8910753880266076, + "eval_loss": 0.8523863554000854, + "eval_runtime": 6.7373, + "eval_samples_per_second": 535.526, + "eval_steps_per_second": 8.46, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0007529457041352247, + "loss": 0.8004, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.871119733924612, + "eval_loss": 0.8795638084411621, + "eval_runtime": 6.8513, + "eval_samples_per_second": 526.615, + "eval_steps_per_second": 8.32, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0007230969378491112, + "loss": 0.8106, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0006911288102423787, + "loss": 0.7925, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9007760532150776, + "eval_loss": 0.830922544002533, + "eval_runtime": 6.7362, + "eval_samples_per_second": 535.617, + "eval_steps_per_second": 8.462, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0006572872366895424, + "loss": 0.7981, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0006218325440804596, + "loss": 0.7874, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8747228381374723, + "eval_loss": 0.8838471174240112, + "eval_runtime": 6.887, + "eval_samples_per_second": 523.885, + "eval_steps_per_second": 8.276, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0005850374682512273, + "loss": 0.7818, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9060421286031042, + "eval_loss": 0.8282538652420044, + "eval_runtime": 6.5474, + "eval_samples_per_second": 551.062, + "eval_steps_per_second": 8.706, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.000547185055959046, + "loss": 0.7736, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0005085664875401447, + "loss": 0.7698, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9054878048780488, + "eval_loss": 0.8233076333999634, + "eval_runtime": 6.7542, + "eval_samples_per_second": 534.183, + "eval_steps_per_second": 8.439, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00046947883700000723, + "loss": 0.7627, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00043022278676644893, + "loss": 0.7585, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8240878582000732, + "eval_runtime": 6.9257, + "eval_samples_per_second": 520.959, + "eval_steps_per_second": 8.23, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.0003911003146848392, + "loss": 0.7577, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8966186252771619, + "eval_loss": 0.8346340656280518, + "eval_runtime": 6.7243, + "eval_samples_per_second": 536.559, + "eval_steps_per_second": 8.477, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0003524123710483028, + "loss": 0.7511, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00031445656353238554, + "loss": 0.7377, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9027161862527716, + "eval_loss": 0.8260533213615417, + "eval_runtime": 6.8531, + "eval_samples_per_second": 526.474, + "eval_steps_per_second": 8.317, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.0002775248678428634, + "loss": 0.7321, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8221997022628784, + "eval_runtime": 6.8486, + "eval_samples_per_second": 526.82, + "eval_steps_per_second": 8.323, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 0.00024190138168757463, + "loss": 0.7397, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 0.00020786013934989006, + "loss": 0.7311, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.914079822616408, + "eval_loss": 0.811286449432373, + "eval_runtime": 7.0004, + "eval_samples_per_second": 515.402, + "eval_steps_per_second": 8.142, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 0.0001756630036752477, + "loss": 0.7268, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 0.00014555765168668586, + "loss": 0.7222, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8159306645393372, + "eval_runtime": 6.6188, + "eval_samples_per_second": 545.114, + "eval_steps_per_second": 8.612, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 0.00011777566932505948, + "loss": 0.7157, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9157427937915743, + "eval_loss": 0.8122764229774475, + "eval_runtime": 7.0008, + "eval_samples_per_second": 515.372, + "eval_steps_per_second": 8.142, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 9.253076997018475e-05, + "loss": 0.7145, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 7.001715044696862e-05, + "loss": 0.7115, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.820366621017456, + "eval_runtime": 6.6102, + "eval_samples_per_second": 545.826, + "eval_steps_per_second": 8.623, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 5.040799716297464e-05, + "loss": 0.7098, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.3854153868987306e-05, + "loss": 0.7137, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8120188117027283, + "eval_runtime": 6.4977, + "eval_samples_per_second": 555.273, + "eval_steps_per_second": 8.772, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 2.0482961290850264e-05, + "loss": 0.707, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9126940133037694, + "eval_loss": 0.8137621283531189, + "eval_runtime": 6.9125, + "eval_samples_per_second": 521.954, + "eval_steps_per_second": 8.246, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.0397277558730457e-05, + "loss": 0.7006, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.6746869691726967e-06, + "loss": 0.7036, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.8131329417228699, + "eval_runtime": 7.0203, + "eval_samples_per_second": 513.941, + "eval_steps_per_second": 8.119, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 3.6690316655681207e-07, + "loss": 0.7052, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8045616149902344, + "eval_runtime": 6.758, + "eval_samples_per_second": 533.883, + "eval_steps_per_second": 8.434, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.000895174946719725, + "metric": "eval/loss", + "warmup_ratio": 0.2607123622313521 + } +} diff --git a/run-tdm55qka/checkpoint-1260/training_args.bin b/run-tdm55qka/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..825e8e9606fc5708a9ffd0dcfd3d6f82d328fb15 --- /dev/null +++ b/run-tdm55qka/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df992ac9eb363aaf2af9b9f9517b8e07a02d2d87b71fdda3d1b931962a2fe959 +size 4792 diff --git a/run-u6wbkjwe/checkpoint-488/model.safetensors b/run-u6wbkjwe/checkpoint-488/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88392d4a9ef2736da1970866eea85704f19a922f --- /dev/null +++ b/run-u6wbkjwe/checkpoint-488/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:517770f5e7e416129a827b12b632027d4d7ba5ba9f76868a29af5decaf2ffa37 +size 198025308 diff --git a/run-u6wbkjwe/checkpoint-488/optimizer.pt b/run-u6wbkjwe/checkpoint-488/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..197e89e551fd5b0a0b57e54514ea952dbc831d56 --- /dev/null +++ b/run-u6wbkjwe/checkpoint-488/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5af0d231d153ef80a061b87ecbca0aa1ec4f83f052669ad0d10edc8cfa1fdff0 +size 395900602 diff --git a/run-u6wbkjwe/checkpoint-488/rng_state.pth b/run-u6wbkjwe/checkpoint-488/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4e242cd45cd82eec2c9f5f7a343995f3fc07291 --- /dev/null +++ b/run-u6wbkjwe/checkpoint-488/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a77e9678cdc4e4bb78c6028d260e7fc1b894ddf6c0f054c0c0d7c6e8bb4722d +size 14244 diff --git a/run-u6wbkjwe/checkpoint-488/scheduler.pt b/run-u6wbkjwe/checkpoint-488/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dfbebdcd0c990e5c9203ea951bdae75dab8063aa --- /dev/null +++ b/run-u6wbkjwe/checkpoint-488/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c503314f9a54af6bc54e31fec2cc653c80cfc736702902f015257731a5e9aa9 +size 1064 diff --git a/run-u6wbkjwe/checkpoint-488/trainer_state.json b/run-u6wbkjwe/checkpoint-488/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a2bfd5da0d4204523821c6adadf01a8664e22e1a --- /dev/null +++ b/run-u6wbkjwe/checkpoint-488/trainer_state.json @@ -0,0 +1,526 @@ +{ + "best_metric": 0.916667914243952, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-u6wbkjwe/checkpoint-488", + "epoch": 22.96470588235294, + "eval_steps": 500, + "global_step": 488, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.588851627738463e-06, + "loss": 1.5165, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8098669623059866, + "eval_f1": 0.7471547751881076, + "eval_loss": 1.2938328981399536, + "eval_precision": 0.7126971638280679, + "eval_recall": 0.8098669623059866, + "eval_runtime": 8.2734, + "eval_samples_per_second": 436.099, + "eval_steps_per_second": 3.505, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.3177703255476927e-05, + "loss": 1.342, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.976655488321539e-05, + "loss": 1.0553, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 0.9643771052360535, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 7.9627, + "eval_samples_per_second": 453.115, + "eval_steps_per_second": 3.642, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 2.6355406510953853e-05, + "loss": 0.9449, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.8562616118405861, + "eval_loss": 0.8870543837547302, + "eval_precision": 0.8749601720694202, + "eval_recall": 0.8819290465631929, + "eval_runtime": 7.9705, + "eval_samples_per_second": 452.668, + "eval_steps_per_second": 3.638, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 3.2944258138692315e-05, + "loss": 0.8949, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 3.953310976643078e-05, + "loss": 0.8529, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.8980421843362844, + "eval_loss": 0.8291715383529663, + "eval_precision": 0.8976043535592082, + "eval_recall": 0.9057649667405765, + "eval_runtime": 7.883, + "eval_samples_per_second": 457.693, + "eval_steps_per_second": 3.679, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 4.612196139416924e-05, + "loss": 0.8305, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 4.811568744722445e-05, + "loss": 0.7985, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.8979558974612086, + "eval_loss": 0.8199787735939026, + "eval_precision": 0.896134625164859, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.893, + "eval_samples_per_second": 457.112, + "eval_steps_per_second": 3.674, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 4.7948685337472274e-05, + "loss": 0.7965, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9108602896573584, + "eval_loss": 0.8020925521850586, + "eval_precision": 0.9068026657444742, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.7461, + "eval_samples_per_second": 465.783, + "eval_steps_per_second": 3.744, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 4.7642625924662004e-05, + "loss": 0.7891, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 4.719929189011405e-05, + "loss": 0.7848, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9050059081409935, + "eval_loss": 0.7995486259460449, + "eval_precision": 0.9010364863140257, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.1875, + "eval_samples_per_second": 440.674, + "eval_steps_per_second": 3.542, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 4.662126548834829e-05, + "loss": 0.7832, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 4.5911913506419795e-05, + "loss": 0.7715, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9095478499059848, + "eval_loss": 0.8039330244064331, + "eval_precision": 0.9074661236355314, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.0158, + "eval_samples_per_second": 450.113, + "eval_steps_per_second": 3.618, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 4.507536765364658e-05, + "loss": 0.7642, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9135414630011542, + "eval_loss": 0.7928462624549866, + "eval_precision": 0.9105810605907115, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.286, + "eval_samples_per_second": 495.196, + "eval_steps_per_second": 3.98, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 4.411650049595205e-05, + "loss": 0.7707, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 4.3040897074995526e-05, + "loss": 0.7615, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.913672614167341, + "eval_loss": 0.7933800220489502, + "eval_precision": 0.9095426071640471, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.5428, + "eval_samples_per_second": 478.34, + "eval_steps_per_second": 3.845, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 4.1854822377398706e-05, + "loss": 0.7507, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.9059790261938773, + "eval_loss": 0.8176500201225281, + "eval_precision": 0.9091631224120729, + "eval_recall": 0.9054878048780488, + "eval_runtime": 7.8245, + "eval_samples_per_second": 461.113, + "eval_steps_per_second": 3.706, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 4.056518484354768e-05, + "loss": 0.7641, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 3.917949612851813e-05, + "loss": 0.7528, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9121823646596363, + "eval_loss": 0.8035166263580322, + "eval_precision": 0.9098749724533292, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9574, + "eval_samples_per_second": 453.417, + "eval_steps_per_second": 3.644, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 3.770582734950096e-05, + "loss": 0.743, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 3.615276207457105e-05, + "loss": 0.7433, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.909062337314533, + "eval_loss": 0.8004091382026672, + "eval_precision": 0.90529474358906, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.7454, + "eval_samples_per_second": 465.822, + "eval_steps_per_second": 3.744, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 3.4529346326621665e-05, + "loss": 0.7399, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9095990245970162, + "eval_loss": 0.8006901741027832, + "eval_precision": 0.9060885739444526, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.5295, + "eval_samples_per_second": 479.184, + "eval_steps_per_second": 3.852, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 3.284503589367302e-05, + "loss": 0.7406, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 3.1109641252452495e-05, + "loss": 0.7405, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.909015321370784, + "eval_loss": 0.803378164768219, + "eval_precision": 0.9077039828046235, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.9226, + "eval_samples_per_second": 455.404, + "eval_steps_per_second": 3.66, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 2.933327042604627e-05, + "loss": 0.7383, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 2.7526270108454945e-05, + "loss": 0.7402, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9144099797256645, + "eval_loss": 0.7924289107322693, + "eval_precision": 0.9101820089771693, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.1772, + "eval_samples_per_second": 441.228, + "eval_steps_per_second": 3.546, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 2.5699165398980467e-05, + "loss": 0.7294, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9157622099121767, + "eval_loss": 0.7920668721199036, + "eval_precision": 0.9120321832808235, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.7326, + "eval_samples_per_second": 466.593, + "eval_steps_per_second": 3.75, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 2.3862598497469262e-05, + "loss": 0.7355, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 2.202726671748842e-05, + "loss": 0.7289, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9114570603745261, + "eval_loss": 0.7972142696380615, + "eval_precision": 0.9074244599640218, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.4532, + "eval_samples_per_second": 484.087, + "eval_steps_per_second": 3.891, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 2.0203860178485247e-05, + "loss": 0.7264, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 1.8402999539849805e-05, + "loss": 0.7271, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9123865641208007, + "eval_loss": 0.8010328412055969, + "eval_precision": 0.909502284395731, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.9779, + "eval_samples_per_second": 452.249, + "eval_steps_per_second": 3.635, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.6635174139556273e-05, + "loss": 0.7257, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.9000402456989192, + "eval_loss": 0.8260993361473083, + "eval_precision": 0.9030497908525028, + "eval_recall": 0.899390243902439, + "eval_runtime": 8.0746, + "eval_samples_per_second": 446.831, + "eval_steps_per_second": 3.591, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.4910680897702282e-05, + "loss": 0.7252, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 1.323956434081028e-05, + "loss": 0.7279, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9151247824019927, + "eval_loss": 0.7955822348594666, + "eval_precision": 0.9126977349425348, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.0262, + "eval_samples_per_second": 449.527, + "eval_steps_per_second": 3.613, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.1631558096226762e-05, + "loss": 0.725, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9112514316503881, + "eval_loss": 0.8032011389732361, + "eval_precision": 0.9073527698521755, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.8958, + "eval_samples_per_second": 456.953, + "eval_steps_per_second": 3.673, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.0096028197392727e-05, + "loss": 0.7232, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 8.64191853021071e-06, + "loss": 0.7221, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.916667914243952, + "eval_loss": 0.7959029078483582, + "eval_precision": 0.9191716007013535, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.8889, + "eval_samples_per_second": 457.35, + "eval_steps_per_second": 3.676, + "step": 488 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.42411198050154514, + "learning_rate": 4.81493003565503e-05, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-u6wbkjwe/checkpoint-488/training_args.bin b/run-u6wbkjwe/checkpoint-488/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2762254fc0ef60d7655c466e52d0037b216ae4de --- /dev/null +++ b/run-u6wbkjwe/checkpoint-488/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f9139bf4d632d69113cf798dbc98c7550bdef4ed21d13104b067256bc646cc +size 4792 diff --git a/run-u6wbkjwe/checkpoint-630/model.safetensors b/run-u6wbkjwe/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1c490adc8011a8b11de6d646ed67bfddc8994cd --- /dev/null +++ b/run-u6wbkjwe/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a736f1f0cdc54144b9492e868f5e8a78f2d1cc81c0aac0773cb1b4a098b9364 +size 198025308 diff --git a/run-u6wbkjwe/checkpoint-630/optimizer.pt b/run-u6wbkjwe/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaace9a3d307086c805ea5cd9cd3bd8a001718fe --- /dev/null +++ b/run-u6wbkjwe/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80b8a5ff8898800da9ffb4febf7126341a5d2f785bf6d319690a1f4d39d0f114 +size 395900602 diff --git a/run-u6wbkjwe/checkpoint-630/rng_state.pth b/run-u6wbkjwe/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-u6wbkjwe/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-u6wbkjwe/checkpoint-630/scheduler.pt b/run-u6wbkjwe/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..440a2af588f7a33b82b15904afdf38b3c9b1e94c --- /dev/null +++ b/run-u6wbkjwe/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f7606995c9d3c6616e852348796a7ee536e4f79ff08989e5107d561329e0d6 +size 1064 diff --git a/run-u6wbkjwe/checkpoint-630/trainer_state.json b/run-u6wbkjwe/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..934c4480c5434433b44a56f820c6474cbb83aadd --- /dev/null +++ b/run-u6wbkjwe/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.916667914243952, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-u6wbkjwe/checkpoint-488", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.588851627738463e-06, + "loss": 1.5165, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8098669623059866, + "eval_f1": 0.7471547751881076, + "eval_loss": 1.2938328981399536, + "eval_precision": 0.7126971638280679, + "eval_recall": 0.8098669623059866, + "eval_runtime": 8.2734, + "eval_samples_per_second": 436.099, + "eval_steps_per_second": 3.505, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 1.3177703255476927e-05, + "loss": 1.342, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 1.976655488321539e-05, + "loss": 1.0553, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.7513704434640557, + "eval_loss": 0.9643771052360535, + "eval_precision": 0.7699235349840203, + "eval_recall": 0.8287139689578714, + "eval_runtime": 7.9627, + "eval_samples_per_second": 453.115, + "eval_steps_per_second": 3.642, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 2.6355406510953853e-05, + "loss": 0.9449, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.8562616118405861, + "eval_loss": 0.8870543837547302, + "eval_precision": 0.8749601720694202, + "eval_recall": 0.8819290465631929, + "eval_runtime": 7.9705, + "eval_samples_per_second": 452.668, + "eval_steps_per_second": 3.638, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 3.2944258138692315e-05, + "loss": 0.8949, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 3.953310976643078e-05, + "loss": 0.8529, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9057649667405765, + "eval_f1": 0.8980421843362844, + "eval_loss": 0.8291715383529663, + "eval_precision": 0.8976043535592082, + "eval_recall": 0.9057649667405765, + "eval_runtime": 7.883, + "eval_samples_per_second": 457.693, + "eval_steps_per_second": 3.679, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 4.612196139416924e-05, + "loss": 0.8305, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 4.811568744722445e-05, + "loss": 0.7985, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9021618625277162, + "eval_f1": 0.8979558974612086, + "eval_loss": 0.8199787735939026, + "eval_precision": 0.896134625164859, + "eval_recall": 0.9021618625277162, + "eval_runtime": 7.893, + "eval_samples_per_second": 457.112, + "eval_steps_per_second": 3.674, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 4.7948685337472274e-05, + "loss": 0.7965, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.9108602896573584, + "eval_loss": 0.8020925521850586, + "eval_precision": 0.9068026657444742, + "eval_recall": 0.9154656319290465, + "eval_runtime": 7.7461, + "eval_samples_per_second": 465.783, + "eval_steps_per_second": 3.744, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 4.7642625924662004e-05, + "loss": 0.7891, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 4.719929189011405e-05, + "loss": 0.7848, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9050059081409935, + "eval_loss": 0.7995486259460449, + "eval_precision": 0.9010364863140257, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.1875, + "eval_samples_per_second": 440.674, + "eval_steps_per_second": 3.542, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 4.662126548834829e-05, + "loss": 0.7832, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 4.5911913506419795e-05, + "loss": 0.7715, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9095478499059848, + "eval_loss": 0.8039330244064331, + "eval_precision": 0.9074661236355314, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.0158, + "eval_samples_per_second": 450.113, + "eval_steps_per_second": 3.618, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 4.507536765364658e-05, + "loss": 0.7642, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9135414630011542, + "eval_loss": 0.7928462624549866, + "eval_precision": 0.9105810605907115, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.286, + "eval_samples_per_second": 495.196, + "eval_steps_per_second": 3.98, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 4.411650049595205e-05, + "loss": 0.7707, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 4.3040897074995526e-05, + "loss": 0.7615, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.913672614167341, + "eval_loss": 0.7933800220489502, + "eval_precision": 0.9095426071640471, + "eval_recall": 0.9187915742793792, + "eval_runtime": 7.5428, + "eval_samples_per_second": 478.34, + "eval_steps_per_second": 3.845, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 4.1854822377398706e-05, + "loss": 0.7507, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.9059790261938773, + "eval_loss": 0.8176500201225281, + "eval_precision": 0.9091631224120729, + "eval_recall": 0.9054878048780488, + "eval_runtime": 7.8245, + "eval_samples_per_second": 461.113, + "eval_steps_per_second": 3.706, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 4.056518484354768e-05, + "loss": 0.7641, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 3.917949612851813e-05, + "loss": 0.7528, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9121823646596363, + "eval_loss": 0.8035166263580322, + "eval_precision": 0.9098749724533292, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.9574, + "eval_samples_per_second": 453.417, + "eval_steps_per_second": 3.644, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 3.770582734950096e-05, + "loss": 0.743, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 3.615276207457105e-05, + "loss": 0.7433, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.909062337314533, + "eval_loss": 0.8004091382026672, + "eval_precision": 0.90529474358906, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.7454, + "eval_samples_per_second": 465.822, + "eval_steps_per_second": 3.744, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 3.4529346326621665e-05, + "loss": 0.7399, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.9095990245970162, + "eval_loss": 0.8006901741027832, + "eval_precision": 0.9060885739444526, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.5295, + "eval_samples_per_second": 479.184, + "eval_steps_per_second": 3.852, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 3.284503589367302e-05, + "loss": 0.7406, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 3.1109641252452495e-05, + "loss": 0.7405, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9165742793791575, + "eval_f1": 0.909015321370784, + "eval_loss": 0.803378164768219, + "eval_precision": 0.9077039828046235, + "eval_recall": 0.9165742793791575, + "eval_runtime": 7.9226, + "eval_samples_per_second": 455.404, + "eval_steps_per_second": 3.66, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 2.933327042604627e-05, + "loss": 0.7383, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 2.7526270108454945e-05, + "loss": 0.7402, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9144099797256645, + "eval_loss": 0.7924289107322693, + "eval_precision": 0.9101820089771693, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.1772, + "eval_samples_per_second": 441.228, + "eval_steps_per_second": 3.546, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 2.5699165398980467e-05, + "loss": 0.7294, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9157622099121767, + "eval_loss": 0.7920668721199036, + "eval_precision": 0.9120321832808235, + "eval_recall": 0.9201773835920177, + "eval_runtime": 7.7326, + "eval_samples_per_second": 466.593, + "eval_steps_per_second": 3.75, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 2.3862598497469262e-05, + "loss": 0.7355, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 2.202726671748842e-05, + "loss": 0.7289, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9114570603745261, + "eval_loss": 0.7972142696380615, + "eval_precision": 0.9074244599640218, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.4532, + "eval_samples_per_second": 484.087, + "eval_steps_per_second": 3.891, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 2.0203860178485247e-05, + "loss": 0.7264, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 1.8402999539849805e-05, + "loss": 0.7271, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9123865641208007, + "eval_loss": 0.8010328412055969, + "eval_precision": 0.909502284395731, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.9779, + "eval_samples_per_second": 452.249, + "eval_steps_per_second": 3.635, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 1.6635174139556273e-05, + "loss": 0.7257, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.9000402456989192, + "eval_loss": 0.8260993361473083, + "eval_precision": 0.9030497908525028, + "eval_recall": 0.899390243902439, + "eval_runtime": 8.0746, + "eval_samples_per_second": 446.831, + "eval_steps_per_second": 3.591, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 1.4910680897702282e-05, + "loss": 0.7252, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 1.323956434081028e-05, + "loss": 0.7279, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9151247824019927, + "eval_loss": 0.7955822348594666, + "eval_precision": 0.9126977349425348, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.0262, + "eval_samples_per_second": 449.527, + "eval_steps_per_second": 3.613, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 1.1631558096226762e-05, + "loss": 0.725, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9112514316503881, + "eval_loss": 0.8032011389732361, + "eval_precision": 0.9073527698521755, + "eval_recall": 0.9162971175166297, + "eval_runtime": 7.8958, + "eval_samples_per_second": 456.953, + "eval_steps_per_second": 3.673, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 1.0096028197392727e-05, + "loss": 0.7232, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 8.64191853021071e-06, + "loss": 0.7221, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.916667914243952, + "eval_loss": 0.7959029078483582, + "eval_precision": 0.9191716007013535, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.8889, + "eval_samples_per_second": 457.35, + "eval_steps_per_second": 3.676, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 7.277698738262973e-06, + "loss": 0.7206, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 6.011314890313422e-06, + "loss": 0.7214, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9132531564687905, + "eval_loss": 0.7980147004127502, + "eval_precision": 0.909743445049866, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.2289, + "eval_samples_per_second": 438.454, + "eval_steps_per_second": 3.524, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 4.850143197436478e-06, + "loss": 0.7225, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9145066268653473, + "eval_loss": 0.7983312606811523, + "eval_precision": 0.9106889439264879, + "eval_recall": 0.9190687361419069, + "eval_runtime": 7.972, + "eval_samples_per_second": 452.583, + "eval_steps_per_second": 3.638, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 3.800947049353307e-06, + "loss": 0.7134, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 2.8698376202227274e-06, + "loss": 0.7213, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9149923754029029, + "eval_loss": 0.797085702419281, + "eval_precision": 0.9115542263523717, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.312, + "eval_samples_per_second": 434.069, + "eval_steps_per_second": 3.489, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 2.062238273343135e-06, + "loss": 0.7211, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 1.382852972094807e-06, + "loss": 0.7179, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9154656319290465, + "eval_f1": 0.91239085648075, + "eval_loss": 0.8052763342857361, + "eval_precision": 0.9095224978425112, + "eval_recall": 0.9154656319290465, + "eval_runtime": 8.1473, + "eval_samples_per_second": 442.845, + "eval_steps_per_second": 3.559, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 8.356388811170274e-07, + "loss": 0.7191, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9144597390138742, + "eval_loss": 0.7976669073104858, + "eval_precision": 0.9108557647426188, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.7361, + "eval_samples_per_second": 466.386, + "eval_steps_per_second": 3.749, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 4.2378331730781496e-07, + "loss": 0.7204, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 1.4968518489809753e-07, + "loss": 0.7209, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9159395578353199, + "eval_loss": 0.7945273518562317, + "eval_precision": 0.9120756221353422, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.9976, + "eval_samples_per_second": 451.138, + "eval_steps_per_second": 3.626, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 1.4941002734064387e-08, + "loss": 0.7178, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9128638454603807, + "eval_loss": 0.7995331287384033, + "eval_precision": 0.9124415092179013, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.396, + "eval_samples_per_second": 487.83, + "eval_steps_per_second": 3.921, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.42411198050154514, + "learning_rate": 4.81493003565503e-05, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-u6wbkjwe/checkpoint-630/training_args.bin b/run-u6wbkjwe/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2762254fc0ef60d7655c466e52d0037b216ae4de --- /dev/null +++ b/run-u6wbkjwe/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f9139bf4d632d69113cf798dbc98c7550bdef4ed21d13104b067256bc646cc +size 4792 diff --git a/run-v000uvwr/checkpoint-850/model.safetensors b/run-v000uvwr/checkpoint-850/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee8771808acd573b80e56e2888adc557e8af03f2 --- /dev/null +++ b/run-v000uvwr/checkpoint-850/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4b18c839e74deaab65b3de2af96010dd04e444ce247884f542f5efec317037 +size 198025308 diff --git a/run-v000uvwr/checkpoint-850/optimizer.pt b/run-v000uvwr/checkpoint-850/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4c5fa7039356c12cc635ec09693924746b4806c --- /dev/null +++ b/run-v000uvwr/checkpoint-850/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a42435ddbc5e399c07dec403a3daeba1251d8880ecfb1365cbeeb06d9edb296f +size 395900602 diff --git a/run-v000uvwr/checkpoint-850/rng_state.pth b/run-v000uvwr/checkpoint-850/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..daf48dc92826e538a672c14823a0ff1f4a801a3e --- /dev/null +++ b/run-v000uvwr/checkpoint-850/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973bfbda0b97e42f54d9eaaeedb43e787bc5c6295165453488a490f8ea219873 +size 14244 diff --git a/run-v000uvwr/checkpoint-850/scheduler.pt b/run-v000uvwr/checkpoint-850/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e260bd997fc4bf92b3f2f0e2b05c3c2ce43abd7 --- /dev/null +++ b/run-v000uvwr/checkpoint-850/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3e9f716fb16e7be2b5cd700b3a402bea3048bfb0a58ae3d900de4c4603aaa5 +size 1064 diff --git a/run-v000uvwr/checkpoint-850/trainer_state.json b/run-v000uvwr/checkpoint-850/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a83879fc045f3580979ae8befe41ec00ffa2a76b --- /dev/null +++ b/run-v000uvwr/checkpoint-850/trainer_state.json @@ -0,0 +1,399 @@ +{ + "best_metric": 0.9138026607538803, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-v000uvwr/checkpoint-85", + "epoch": 20.0, + "eval_steps": 500, + "global_step": 850, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.477787002617446e-05, + "loss": 1.2936, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8697339246119734, + "eval_loss": 0.9165277481079102, + "eval_runtime": 6.9203, + "eval_samples_per_second": 521.365, + "eval_steps_per_second": 8.237, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 6.955574005234892e-05, + "loss": 0.9145, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00010433361007852338, + "loss": 0.8385, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.808893084526062, + "eval_runtime": 6.7262, + "eval_samples_per_second": 536.413, + "eval_steps_per_second": 8.474, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00013911148010469784, + "loss": 0.8, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8072550892829895, + "eval_runtime": 6.9392, + "eval_samples_per_second": 519.944, + "eval_steps_per_second": 8.214, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001738893501308723, + "loss": 0.7991, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00020866722015704675, + "loss": 0.7826, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8211786150932312, + "eval_runtime": 6.7367, + "eval_samples_per_second": 535.57, + "eval_steps_per_second": 8.461, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00024344509018322123, + "loss": 0.783, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00027822296020939567, + "loss": 0.7784, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8694567627494457, + "eval_loss": 0.8681292533874512, + "eval_runtime": 6.6543, + "eval_samples_per_second": 542.207, + "eval_steps_per_second": 8.566, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00031300083023557014, + "loss": 0.7812, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8791574279379157, + "eval_loss": 0.8830167651176453, + "eval_runtime": 7.0137, + "eval_samples_per_second": 514.425, + "eval_steps_per_second": 8.127, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003477787002617446, + "loss": 0.7731, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00038255657028791904, + "loss": 0.7768, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8555986696230599, + "eval_loss": 0.9214200377464294, + "eval_runtime": 6.8345, + "eval_samples_per_second": 527.91, + "eval_steps_per_second": 8.34, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004173344403140935, + "loss": 0.7804, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.000452112310340268, + "loss": 0.7732, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8810975609756098, + "eval_loss": 0.8661342859268188, + "eval_runtime": 6.7571, + "eval_samples_per_second": 533.956, + "eval_steps_per_second": 8.436, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0004706359751687423, + "loss": 0.7763, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.7117516629711752, + "eval_loss": 1.0528497695922852, + "eval_runtime": 7.2388, + "eval_samples_per_second": 498.428, + "eval_steps_per_second": 7.874, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00046880705154036395, + "loss": 0.7827, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004650907464952267, + "loss": 0.7716, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8869179600886918, + "eval_loss": 0.8526727557182312, + "eval_runtime": 6.99, + "eval_samples_per_second": 516.166, + "eval_steps_per_second": 8.155, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00045951711340535965, + "loss": 0.7604, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8849778270509978, + "eval_loss": 0.8628765344619751, + "eval_runtime": 6.7405, + "eval_samples_per_second": 535.27, + "eval_steps_per_second": 8.456, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00045213122566134024, + "loss": 0.7723, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00044299281216847664, + "loss": 0.7563, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.832039911308204, + "eval_loss": 0.9178381562232971, + "eval_runtime": 6.8518, + "eval_samples_per_second": 526.573, + "eval_steps_per_second": 8.319, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00043217577432536834, + "loss": 0.7562, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004197675883909889, + "loss": 0.7486, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8946784922394678, + "eval_loss": 0.8446255922317505, + "eval_runtime": 7.1664, + "eval_samples_per_second": 503.463, + "eval_steps_per_second": 7.954, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.000405868598073286, + "loss": 0.7545, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8899667405764967, + "eval_loss": 0.840202271938324, + "eval_runtime": 6.6424, + "eval_samples_per_second": 543.18, + "eval_steps_per_second": 8.581, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00039059120306006114, + "loss": 0.7484, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003740589500543954, + "loss": 0.7354, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8186755180358887, + "eval_runtime": 6.7169, + "eval_samples_per_second": 537.152, + "eval_steps_per_second": 8.486, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003564055336653228, + "loss": 0.7346, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00033777371523344477, + "loss": 0.742, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8185840249061584, + "eval_runtime": 6.7593, + "eval_samples_per_second": 533.785, + "eval_steps_per_second": 8.433, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003183141683348303, + "loss": 0.7361, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9041019955654102, + "eval_loss": 0.8267068266868591, + "eval_runtime": 7.1176, + "eval_samples_per_second": 506.913, + "eval_steps_per_second": 8.008, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00029818426029949176, + "loss": 0.7285, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002775467795981688, + "loss": 0.7223, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8227972388267517, + "eval_runtime": 7.1078, + "eval_samples_per_second": 507.612, + "eval_steps_per_second": 8.019, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002565686193889123, + "loss": 0.7193, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023541942786948865, + "loss": 0.7143, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8155473470687866, + "eval_runtime": 6.92, + "eval_samples_per_second": 521.386, + "eval_steps_per_second": 8.237, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.000214270236350065, + "loss": 0.71, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8187105059623718, + "eval_runtime": 6.7617, + "eval_samples_per_second": 533.595, + "eval_steps_per_second": 8.43, + "step": 850 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0004708388557389773, + "metric": "eval/loss", + "warmup_ratio": 0.2792628600083835 + } +} diff --git a/run-v000uvwr/checkpoint-850/training_args.bin b/run-v000uvwr/checkpoint-850/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b062be20b5d8973d7e165308060c33be7fa722f6 --- /dev/null +++ b/run-v000uvwr/checkpoint-850/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee75459f5654ce3be5961b0875053ff71d8046e30b81c8719d03f8bc6a1cdcd3 +size 4792 diff --git a/run-v000uvwr/checkpoint-892/model.safetensors b/run-v000uvwr/checkpoint-892/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30478994e05f0fdb5ac6e52174ed341b55eff3ac --- /dev/null +++ b/run-v000uvwr/checkpoint-892/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f918cde4a9a321cb1f9082195202a6beb598c6fd2ef9246a164c66c2b6d0e1 +size 198025308 diff --git a/run-v000uvwr/checkpoint-892/optimizer.pt b/run-v000uvwr/checkpoint-892/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1924467866a423d633cdbd488d985e17237dc6b5 --- /dev/null +++ b/run-v000uvwr/checkpoint-892/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20631957d427f4b935a3107f50dcf4d53eed14df7c437c23d005be5df2c7870 +size 395900602 diff --git a/run-v000uvwr/checkpoint-892/rng_state.pth b/run-v000uvwr/checkpoint-892/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..046a67863637b18e432c309fc608e1de12a4f4ad --- /dev/null +++ b/run-v000uvwr/checkpoint-892/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37b95cff6f5c13474d42738fa34d6f65f2efc96633b3b5733ebee5529adb94d +size 14244 diff --git a/run-v000uvwr/checkpoint-892/scheduler.pt b/run-v000uvwr/checkpoint-892/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..67136f7add42f847473b59aab4157effca22f944 --- /dev/null +++ b/run-v000uvwr/checkpoint-892/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4627decf0f1b736cddaac6558c694df37d351aa41d90890bd7f4e6b4a0da98 +size 1064 diff --git a/run-v000uvwr/checkpoint-892/trainer_state.json b/run-v000uvwr/checkpoint-892/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a5ee2d0b33dc0a59e3367a5962cc56b8e484d9d6 --- /dev/null +++ b/run-v000uvwr/checkpoint-892/trainer_state.json @@ -0,0 +1,420 @@ +{ + "best_metric": 0.9143569844789357, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-v000uvwr/checkpoint-892", + "epoch": 20.988235294117647, + "eval_steps": 500, + "global_step": 892, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 3.477787002617446e-05, + "loss": 1.2936, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8697339246119734, + "eval_loss": 0.9165277481079102, + "eval_runtime": 6.9203, + "eval_samples_per_second": 521.365, + "eval_steps_per_second": 8.237, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 6.955574005234892e-05, + "loss": 0.9145, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.00010433361007852338, + "loss": 0.8385, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.808893084526062, + "eval_runtime": 6.7262, + "eval_samples_per_second": 536.413, + "eval_steps_per_second": 8.474, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00013911148010469784, + "loss": 0.8, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8072550892829895, + "eval_runtime": 6.9392, + "eval_samples_per_second": 519.944, + "eval_steps_per_second": 8.214, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001738893501308723, + "loss": 0.7991, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00020866722015704675, + "loss": 0.7826, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9065964523281597, + "eval_loss": 0.8211786150932312, + "eval_runtime": 6.7367, + "eval_samples_per_second": 535.57, + "eval_steps_per_second": 8.461, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00024344509018322123, + "loss": 0.783, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00027822296020939567, + "loss": 0.7784, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8694567627494457, + "eval_loss": 0.8681292533874512, + "eval_runtime": 6.6543, + "eval_samples_per_second": 542.207, + "eval_steps_per_second": 8.566, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00031300083023557014, + "loss": 0.7812, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8791574279379157, + "eval_loss": 0.8830167651176453, + "eval_runtime": 7.0137, + "eval_samples_per_second": 514.425, + "eval_steps_per_second": 8.127, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.0003477787002617446, + "loss": 0.7731, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00038255657028791904, + "loss": 0.7768, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8555986696230599, + "eval_loss": 0.9214200377464294, + "eval_runtime": 6.8345, + "eval_samples_per_second": 527.91, + "eval_steps_per_second": 8.34, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004173344403140935, + "loss": 0.7804, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.000452112310340268, + "loss": 0.7732, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8810975609756098, + "eval_loss": 0.8661342859268188, + "eval_runtime": 6.7571, + "eval_samples_per_second": 533.956, + "eval_steps_per_second": 8.436, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.0004706359751687423, + "loss": 0.7763, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.7117516629711752, + "eval_loss": 1.0528497695922852, + "eval_runtime": 7.2388, + "eval_samples_per_second": 498.428, + "eval_steps_per_second": 7.874, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00046880705154036395, + "loss": 0.7827, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.0004650907464952267, + "loss": 0.7716, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8869179600886918, + "eval_loss": 0.8526727557182312, + "eval_runtime": 6.99, + "eval_samples_per_second": 516.166, + "eval_steps_per_second": 8.155, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00045951711340535965, + "loss": 0.7604, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8849778270509978, + "eval_loss": 0.8628765344619751, + "eval_runtime": 6.7405, + "eval_samples_per_second": 535.27, + "eval_steps_per_second": 8.456, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00045213122566134024, + "loss": 0.7723, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00044299281216847664, + "loss": 0.7563, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.832039911308204, + "eval_loss": 0.9178381562232971, + "eval_runtime": 6.8518, + "eval_samples_per_second": 526.573, + "eval_steps_per_second": 8.319, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.00043217577432536834, + "loss": 0.7562, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.0004197675883909889, + "loss": 0.7486, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8946784922394678, + "eval_loss": 0.8446255922317505, + "eval_runtime": 7.1664, + "eval_samples_per_second": 503.463, + "eval_steps_per_second": 7.954, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.000405868598073286, + "loss": 0.7545, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8899667405764967, + "eval_loss": 0.840202271938324, + "eval_runtime": 6.6424, + "eval_samples_per_second": 543.18, + "eval_steps_per_second": 8.581, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00039059120306006114, + "loss": 0.7484, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.0003740589500543954, + "loss": 0.7354, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8186755180358887, + "eval_runtime": 6.7169, + "eval_samples_per_second": 537.152, + "eval_steps_per_second": 8.486, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.0003564055336653228, + "loss": 0.7346, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.00033777371523344477, + "loss": 0.742, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8185840249061584, + "eval_runtime": 6.7593, + "eval_samples_per_second": 533.785, + "eval_steps_per_second": 8.433, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.0003183141683348303, + "loss": 0.7361, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9041019955654102, + "eval_loss": 0.8267068266868591, + "eval_runtime": 7.1176, + "eval_samples_per_second": 506.913, + "eval_steps_per_second": 8.008, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00029818426029949176, + "loss": 0.7285, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002775467795981688, + "loss": 0.7223, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9110310421286031, + "eval_loss": 0.8227972388267517, + "eval_runtime": 7.1078, + "eval_samples_per_second": 507.612, + "eval_steps_per_second": 8.019, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.0002565686193889123, + "loss": 0.7193, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00023541942786948865, + "loss": 0.7143, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8155473470687866, + "eval_runtime": 6.92, + "eval_samples_per_second": 521.386, + "eval_steps_per_second": 8.237, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.000214270236350065, + "loss": 0.71, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9088137472283814, + "eval_loss": 0.8187105059623718, + "eval_runtime": 6.7617, + "eval_samples_per_second": 533.595, + "eval_steps_per_second": 8.43, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00019329207614080852, + "loss": 0.7112, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00017265459543948556, + "loss": 0.7055, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9143569844789357, + "eval_loss": 0.8171006441116333, + "eval_runtime": 7.0036, + "eval_samples_per_second": 515.162, + "eval_steps_per_second": 8.139, + "step": 892 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0004708388557389773, + "metric": "eval/loss", + "warmup_ratio": 0.2792628600083835 + } +} diff --git a/run-v000uvwr/checkpoint-892/training_args.bin b/run-v000uvwr/checkpoint-892/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b062be20b5d8973d7e165308060c33be7fa722f6 --- /dev/null +++ b/run-v000uvwr/checkpoint-892/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee75459f5654ce3be5961b0875053ff71d8046e30b81c8719d03f8bc6a1cdcd3 +size 4792 diff --git a/run-vrdj6xlx/checkpoint-616/model.safetensors b/run-vrdj6xlx/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be65c262336246b0102dadf2e24454d176f1132e --- /dev/null +++ b/run-vrdj6xlx/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32faac481b40c0ebc6e0a1d929ba7da8f18c51f369a18c613730193f78fd74bb +size 198025308 diff --git a/run-vrdj6xlx/checkpoint-616/optimizer.pt b/run-vrdj6xlx/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..987ded4252b017268a7322a4a1aefac8488f533e --- /dev/null +++ b/run-vrdj6xlx/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174f2bf2a4534c34ea79325b9705d78acbd67c4f748e621b34b978374ec5e35a +size 395900602 diff --git a/run-vrdj6xlx/checkpoint-616/rng_state.pth b/run-vrdj6xlx/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-vrdj6xlx/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-vrdj6xlx/checkpoint-616/scheduler.pt b/run-vrdj6xlx/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..120978b0071aa52e4fb2ac94d3c75cc5b0bb7a59 --- /dev/null +++ b/run-vrdj6xlx/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217f30a34a0380ff2c64870a0a7f0bda90837fe394ce9331aa800078ab8a9d4b +size 1064 diff --git a/run-vrdj6xlx/checkpoint-616/trainer_state.json b/run-vrdj6xlx/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..396c8211dc92a1dae170051a9dc51b8c54469900 --- /dev/null +++ b/run-vrdj6xlx/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9139898150700022, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-vrdj6xlx/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00011174488052831688, + "loss": 1.2677, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8292682926829268, + "eval_f1": 0.7526922431410414, + "eval_loss": 1.0391713380813599, + "eval_precision": 0.7703043936882441, + "eval_recall": 0.8292682926829268, + "eval_runtime": 7.9747, + "eval_samples_per_second": 452.43, + "eval_steps_per_second": 3.636, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00022348976105663376, + "loss": 0.9196, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00033523464158495065, + "loss": 0.8196, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8899439663307231, + "eval_loss": 0.8275727033615112, + "eval_precision": 0.8917034066849797, + "eval_recall": 0.9013303769401331, + "eval_runtime": 8.0612, + "eval_samples_per_second": 447.574, + "eval_steps_per_second": 3.597, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0004469795221132675, + "loss": 0.8006, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8076496674057649, + "eval_f1": 0.8286124493691536, + "eval_loss": 0.9636921882629395, + "eval_precision": 0.8751698676836334, + "eval_recall": 0.8076496674057649, + "eval_runtime": 7.9458, + "eval_samples_per_second": 454.078, + "eval_steps_per_second": 3.65, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0005587244026415844, + "loss": 0.7921, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0006704692831699013, + "loss": 0.7854, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6549334811529933, + "eval_f1": 0.7105334225104484, + "eval_loss": 1.1556798219680786, + "eval_precision": 0.8589335112346032, + "eval_recall": 0.6549334811529933, + "eval_runtime": 8.0017, + "eval_samples_per_second": 450.906, + "eval_steps_per_second": 3.624, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0007822141636982181, + "loss": 0.8045, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0008160271393413373, + "loss": 0.7862, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8738913525498891, + "eval_f1": 0.8666497036916437, + "eval_loss": 0.8896118998527527, + "eval_precision": 0.8756908718454293, + "eval_recall": 0.8738913525498891, + "eval_runtime": 8.026, + "eval_samples_per_second": 449.541, + "eval_steps_per_second": 3.613, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0008131948353441326, + "loss": 0.8024, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8458980044345898, + "eval_f1": 0.808545140802328, + "eval_loss": 0.9162513613700867, + "eval_precision": 0.8260174016257357, + "eval_recall": 0.8458980044345898, + "eval_runtime": 7.9211, + "eval_samples_per_second": 455.494, + "eval_steps_per_second": 3.661, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0008080041626060989, + "loss": 0.8018, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.000800485354849655, + "loss": 0.796, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8384146341463414, + "eval_f1": 0.8293850340041232, + "eval_loss": 0.952700138092041, + "eval_precision": 0.8629293120237446, + "eval_recall": 0.8384146341463414, + "eval_runtime": 8.0794, + "eval_samples_per_second": 446.57, + "eval_steps_per_second": 3.589, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007906822063107499, + "loss": 0.7988, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007786518166538603, + "loss": 0.7819, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8497782705099778, + "eval_f1": 0.8032836355888552, + "eval_loss": 0.9329529404640198, + "eval_precision": 0.8002584923316631, + "eval_recall": 0.8497782705099778, + "eval_runtime": 7.8404, + "eval_samples_per_second": 460.182, + "eval_steps_per_second": 3.699, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007644642583878551, + "loss": 0.7718, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.856430155210643, + "eval_f1": 0.8104305336895903, + "eval_loss": 0.9171992540359497, + "eval_precision": 0.854794001311171, + "eval_recall": 0.856430155210643, + "eval_runtime": 7.9644, + "eval_samples_per_second": 453.015, + "eval_steps_per_second": 3.641, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.000748202168719905, + "loss": 0.7819, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0007299602682247362, + "loss": 0.7846, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8799889135254989, + "eval_f1": 0.8553212546285753, + "eval_loss": 0.8724035024642944, + "eval_precision": 0.8719274879772271, + "eval_recall": 0.8799889135254989, + "eval_runtime": 7.7992, + "eval_samples_per_second": 462.614, + "eval_steps_per_second": 3.718, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0007098448091327991, + "loss": 0.7655, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8686252771618626, + "eval_f1": 0.8740794509646093, + "eval_loss": 0.884833812713623, + "eval_precision": 0.8842000527051329, + "eval_recall": 0.8686252771618626, + "eval_runtime": 8.0473, + "eval_samples_per_second": 448.351, + "eval_steps_per_second": 3.604, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0006879729564508654, + "loss": 0.7777, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0006644721055197961, + "loss": 0.7604, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8966186252771619, + "eval_f1": 0.8810507212841943, + "eval_loss": 0.8381132483482361, + "eval_precision": 0.8844978196311211, + "eval_recall": 0.8966186252771619, + "eval_runtime": 7.91, + "eval_samples_per_second": 456.134, + "eval_steps_per_second": 3.666, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006394791399844488, + "loss": 0.7558, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0006131396344977714, + "loss": 0.752, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8971729490022173, + "eval_f1": 0.8848298098839147, + "eval_loss": 0.8453401327133179, + "eval_precision": 0.8856426040157203, + "eval_recall": 0.8971729490022173, + "eval_runtime": 7.504, + "eval_samples_per_second": 480.811, + "eval_steps_per_second": 3.865, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0005856070068030333, + "loss": 0.7536, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8946784922394678, + "eval_f1": 0.8815589491266002, + "eval_loss": 0.8512121438980103, + "eval_precision": 0.8855466342078576, + "eval_recall": 0.8946784922394678, + "eval_runtime": 7.8407, + "eval_samples_per_second": 460.164, + "eval_steps_per_second": 3.699, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005570416241329965, + "loss": 0.7543, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0005276098691309143, + "loss": 0.7426, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.897450110864745, + "eval_f1": 0.8909037526883057, + "eval_loss": 0.8405832052230835, + "eval_precision": 0.8966029506077826, + "eval_recall": 0.897450110864745, + "eval_runtime": 7.9226, + "eval_samples_per_second": 455.407, + "eval_steps_per_second": 3.66, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0004974831707340218, + "loss": 0.7284, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0004668370056642552, + "loss": 0.7403, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8921840354767184, + "eval_f1": 0.8877390070946076, + "eval_loss": 0.8361708521842957, + "eval_precision": 0.8844153960332184, + "eval_recall": 0.8921840354767184, + "eval_runtime": 7.9079, + "eval_samples_per_second": 456.25, + "eval_steps_per_second": 3.667, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00043584987634214163, + "loss": 0.7306, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8472838137472284, + "eval_f1": 0.8574331425199453, + "eval_loss": 0.9056646823883057, + "eval_precision": 0.8789827223692359, + "eval_recall": 0.8472838137472284, + "eval_runtime": 8.1126, + "eval_samples_per_second": 444.74, + "eval_steps_per_second": 3.575, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0004047022711771317, + "loss": 0.7284, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003735756132902889, + "loss": 0.7223, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.8987132818763163, + "eval_loss": 0.8273950815200806, + "eval_precision": 0.8953136417837683, + "eval_recall": 0.905210643015521, + "eval_runtime": 8.008, + "eval_samples_per_second": 450.547, + "eval_steps_per_second": 3.621, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00034265120379263595, + "loss": 0.7194, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00031210916577416293, + "loss": 0.7141, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.8992008609985861, + "eval_loss": 0.8288394212722778, + "eval_precision": 0.8953465845974982, + "eval_recall": 0.9054878048780488, + "eval_runtime": 7.8431, + "eval_samples_per_second": 460.023, + "eval_steps_per_second": 3.698, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002821273951543668, + "loss": 0.7102, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8980044345898004, + "eval_f1": 0.8907601682963122, + "eval_loss": 0.8337363600730896, + "eval_precision": 0.8862875595074383, + "eval_recall": 0.8980044345898004, + "eval_runtime": 8.018, + "eval_samples_per_second": 449.985, + "eval_steps_per_second": 3.617, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.0002528805245052235, + "loss": 0.7137, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00022453890588193626, + "loss": 0.7041, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.904579858101332, + "eval_loss": 0.8208028078079224, + "eval_precision": 0.9024604469931671, + "eval_recall": 0.907150776053215, + "eval_runtime": 7.6846, + "eval_samples_per_second": 469.511, + "eval_steps_per_second": 3.774, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00019726761858608805, + "loss": 0.7037, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9049334811529933, + "eval_f1": 0.9002840097359408, + "eval_loss": 0.8269202709197998, + "eval_precision": 0.8974380444011593, + "eval_recall": 0.9049334811529933, + "eval_runtime": 8.0701, + "eval_samples_per_second": 447.083, + "eval_steps_per_second": 3.594, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00017122550764060861, + "loss": 0.7045, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.00014656425857707532, + "loss": 0.7038, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9085398263211256, + "eval_loss": 0.8176431655883789, + "eval_precision": 0.9086579230010748, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.6707, + "eval_samples_per_second": 470.364, + "eval_steps_per_second": 3.781, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 0.00012342751392436718, + "loss": 0.699, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 0.0001019500365447946, + "loss": 0.6975, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9040742462795275, + "eval_loss": 0.8225728273391724, + "eval_precision": 0.9023252523718525, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.938, + "eval_samples_per_second": 454.521, + "eval_steps_per_second": 3.653, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 8.225692469095638e-05, + "loss": 0.6985, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9072800749263832, + "eval_loss": 0.8168608546257019, + "eval_precision": 0.9050856666072918, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.9808, + "eval_samples_per_second": 452.082, + "eval_steps_per_second": 3.634, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 6.44628833553244e-05, + "loss": 0.6929, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 4.8671556156672855e-05, + "loss": 0.6965, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.906914966022514, + "eval_loss": 0.81673264503479, + "eval_precision": 0.9042998263150301, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.939, + "eval_samples_per_second": 454.464, + "eval_steps_per_second": 3.653, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.497492165486024e-05, + "loss": 0.6931, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.3452757610206105e-05, + "loss": 0.6931, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9084932873089966, + "eval_loss": 0.8139696717262268, + "eval_precision": 0.9064485274126076, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.2019, + "eval_samples_per_second": 439.9, + "eval_steps_per_second": 3.536, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.4172176307950878e-05, + "loss": 0.6934, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9104148080911771, + "eval_loss": 0.8104686737060547, + "eval_precision": 0.9078149684288562, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.0885, + "eval_samples_per_second": 446.063, + "eval_steps_per_second": 3.585, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 7.187233654358336e-06, + "loss": 0.6936, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.538614321330241e-06, + "loss": 0.6948, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9139898150700022, + "eval_loss": 0.8071839809417725, + "eval_precision": 0.9111549356504294, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.6843, + "eval_samples_per_second": 469.532, + "eval_steps_per_second": 3.774, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.405218143894378, + "learning_rate": 0.0008165972038607772, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-vrdj6xlx/checkpoint-616/training_args.bin b/run-vrdj6xlx/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fec4005539dd4e53915d9108a3895903456c8636 --- /dev/null +++ b/run-vrdj6xlx/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0019c6fbaeee25232b03494f3c382fe9c3bca09be25c8a2887e71d2788f015cc +size 4792 diff --git a/run-vrdj6xlx/checkpoint-630/model.safetensors b/run-vrdj6xlx/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b396e6801efa77f5cdf0c2cdcd59d19348a1ee6 --- /dev/null +++ b/run-vrdj6xlx/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:046361c54b7289165a1fdb32c535de56a39ac48711de0343615c7d843f1131fc +size 198025308 diff --git a/run-vrdj6xlx/checkpoint-630/optimizer.pt b/run-vrdj6xlx/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..34f911487ea45be023dc11d4c2a2190f0cd1bd86 --- /dev/null +++ b/run-vrdj6xlx/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a626aa0d8d4ff7ea64d4965ba3b0f16bc5d0da0b4174b4d077f0bc9ee3533a +size 395900602 diff --git a/run-vrdj6xlx/checkpoint-630/rng_state.pth b/run-vrdj6xlx/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-vrdj6xlx/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-vrdj6xlx/checkpoint-630/scheduler.pt b/run-vrdj6xlx/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9eb8f38c5aae38b500e4fbb044e8df9f63f758d --- /dev/null +++ b/run-vrdj6xlx/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387127fd0f048d30e3fce7e56e51387889400da28fb9863a7a0328a75ede596d +size 1064 diff --git a/run-vrdj6xlx/checkpoint-630/trainer_state.json b/run-vrdj6xlx/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c08d89c4c167730b921de3eebf18a21b1cf98cf3 --- /dev/null +++ b/run-vrdj6xlx/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9139898150700022, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-vrdj6xlx/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 0.00011174488052831688, + "loss": 1.2677, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8292682926829268, + "eval_f1": 0.7526922431410414, + "eval_loss": 1.0391713380813599, + "eval_precision": 0.7703043936882441, + "eval_recall": 0.8292682926829268, + "eval_runtime": 7.9747, + "eval_samples_per_second": 452.43, + "eval_steps_per_second": 3.636, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 0.00022348976105663376, + "loss": 0.9196, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 0.00033523464158495065, + "loss": 0.8196, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.9013303769401331, + "eval_f1": 0.8899439663307231, + "eval_loss": 0.8275727033615112, + "eval_precision": 0.8917034066849797, + "eval_recall": 0.9013303769401331, + "eval_runtime": 8.0612, + "eval_samples_per_second": 447.574, + "eval_steps_per_second": 3.597, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.0004469795221132675, + "loss": 0.8006, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.8076496674057649, + "eval_f1": 0.8286124493691536, + "eval_loss": 0.9636921882629395, + "eval_precision": 0.8751698676836334, + "eval_recall": 0.8076496674057649, + "eval_runtime": 7.9458, + "eval_samples_per_second": 454.078, + "eval_steps_per_second": 3.65, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.0005587244026415844, + "loss": 0.7921, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.0006704692831699013, + "loss": 0.7854, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6549334811529933, + "eval_f1": 0.7105334225104484, + "eval_loss": 1.1556798219680786, + "eval_precision": 0.8589335112346032, + "eval_recall": 0.6549334811529933, + "eval_runtime": 8.0017, + "eval_samples_per_second": 450.906, + "eval_steps_per_second": 3.624, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.0007822141636982181, + "loss": 0.8045, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0008160271393413373, + "loss": 0.7862, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8738913525498891, + "eval_f1": 0.8666497036916437, + "eval_loss": 0.8896118998527527, + "eval_precision": 0.8756908718454293, + "eval_recall": 0.8738913525498891, + "eval_runtime": 8.026, + "eval_samples_per_second": 449.541, + "eval_steps_per_second": 3.613, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0008131948353441326, + "loss": 0.8024, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8458980044345898, + "eval_f1": 0.808545140802328, + "eval_loss": 0.9162513613700867, + "eval_precision": 0.8260174016257357, + "eval_recall": 0.8458980044345898, + "eval_runtime": 7.9211, + "eval_samples_per_second": 455.494, + "eval_steps_per_second": 3.661, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0008080041626060989, + "loss": 0.8018, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.000800485354849655, + "loss": 0.796, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.8384146341463414, + "eval_f1": 0.8293850340041232, + "eval_loss": 0.952700138092041, + "eval_precision": 0.8629293120237446, + "eval_recall": 0.8384146341463414, + "eval_runtime": 8.0794, + "eval_samples_per_second": 446.57, + "eval_steps_per_second": 3.589, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007906822063107499, + "loss": 0.7988, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007786518166538603, + "loss": 0.7819, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8497782705099778, + "eval_f1": 0.8032836355888552, + "eval_loss": 0.9329529404640198, + "eval_precision": 0.8002584923316631, + "eval_recall": 0.8497782705099778, + "eval_runtime": 7.8404, + "eval_samples_per_second": 460.182, + "eval_steps_per_second": 3.699, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0007644642583878551, + "loss": 0.7718, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.856430155210643, + "eval_f1": 0.8104305336895903, + "eval_loss": 0.9171992540359497, + "eval_precision": 0.854794001311171, + "eval_recall": 0.856430155210643, + "eval_runtime": 7.9644, + "eval_samples_per_second": 453.015, + "eval_steps_per_second": 3.641, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.000748202168719905, + "loss": 0.7819, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.0007299602682247362, + "loss": 0.7846, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8799889135254989, + "eval_f1": 0.8553212546285753, + "eval_loss": 0.8724035024642944, + "eval_precision": 0.8719274879772271, + "eval_recall": 0.8799889135254989, + "eval_runtime": 7.7992, + "eval_samples_per_second": 462.614, + "eval_steps_per_second": 3.718, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0007098448091327991, + "loss": 0.7655, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8686252771618626, + "eval_f1": 0.8740794509646093, + "eval_loss": 0.884833812713623, + "eval_precision": 0.8842000527051329, + "eval_recall": 0.8686252771618626, + "eval_runtime": 8.0473, + "eval_samples_per_second": 448.351, + "eval_steps_per_second": 3.604, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0006879729564508654, + "loss": 0.7777, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0006644721055197961, + "loss": 0.7604, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8966186252771619, + "eval_f1": 0.8810507212841943, + "eval_loss": 0.8381132483482361, + "eval_precision": 0.8844978196311211, + "eval_recall": 0.8966186252771619, + "eval_runtime": 7.91, + "eval_samples_per_second": 456.134, + "eval_steps_per_second": 3.666, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0006394791399844488, + "loss": 0.7558, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0006131396344977714, + "loss": 0.752, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8971729490022173, + "eval_f1": 0.8848298098839147, + "eval_loss": 0.8453401327133179, + "eval_precision": 0.8856426040157203, + "eval_recall": 0.8971729490022173, + "eval_runtime": 7.504, + "eval_samples_per_second": 480.811, + "eval_steps_per_second": 3.865, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0005856070068030333, + "loss": 0.7536, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.8946784922394678, + "eval_f1": 0.8815589491266002, + "eval_loss": 0.8512121438980103, + "eval_precision": 0.8855466342078576, + "eval_recall": 0.8946784922394678, + "eval_runtime": 7.8407, + "eval_samples_per_second": 460.164, + "eval_steps_per_second": 3.699, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0005570416241329965, + "loss": 0.7543, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0005276098691309143, + "loss": 0.7426, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.897450110864745, + "eval_f1": 0.8909037526883057, + "eval_loss": 0.8405832052230835, + "eval_precision": 0.8966029506077826, + "eval_recall": 0.897450110864745, + "eval_runtime": 7.9226, + "eval_samples_per_second": 455.407, + "eval_steps_per_second": 3.66, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0004974831707340218, + "loss": 0.7284, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0004668370056642552, + "loss": 0.7403, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8921840354767184, + "eval_f1": 0.8877390070946076, + "eval_loss": 0.8361708521842957, + "eval_precision": 0.8844153960332184, + "eval_recall": 0.8921840354767184, + "eval_runtime": 7.9079, + "eval_samples_per_second": 456.25, + "eval_steps_per_second": 3.667, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00043584987634214163, + "loss": 0.7306, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8472838137472284, + "eval_f1": 0.8574331425199453, + "eval_loss": 0.9056646823883057, + "eval_precision": 0.8789827223692359, + "eval_recall": 0.8472838137472284, + "eval_runtime": 8.1126, + "eval_samples_per_second": 444.74, + "eval_steps_per_second": 3.575, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0004047022711771317, + "loss": 0.7284, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.0003735756132902889, + "loss": 0.7223, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.8987132818763163, + "eval_loss": 0.8273950815200806, + "eval_precision": 0.8953136417837683, + "eval_recall": 0.905210643015521, + "eval_runtime": 8.008, + "eval_samples_per_second": 450.547, + "eval_steps_per_second": 3.621, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00034265120379263595, + "loss": 0.7194, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 0.00031210916577416293, + "loss": 0.7141, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9054878048780488, + "eval_f1": 0.8992008609985861, + "eval_loss": 0.8288394212722778, + "eval_precision": 0.8953465845974982, + "eval_recall": 0.9054878048780488, + "eval_runtime": 7.8431, + "eval_samples_per_second": 460.023, + "eval_steps_per_second": 3.698, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 0.0002821273951543668, + "loss": 0.7102, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8980044345898004, + "eval_f1": 0.8907601682963122, + "eval_loss": 0.8337363600730896, + "eval_precision": 0.8862875595074383, + "eval_recall": 0.8980044345898004, + "eval_runtime": 8.018, + "eval_samples_per_second": 449.985, + "eval_steps_per_second": 3.617, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 0.0002528805245052235, + "loss": 0.7137, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 0.00022453890588193626, + "loss": 0.7041, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.904579858101332, + "eval_loss": 0.8208028078079224, + "eval_precision": 0.9024604469931671, + "eval_recall": 0.907150776053215, + "eval_runtime": 7.6846, + "eval_samples_per_second": 469.511, + "eval_steps_per_second": 3.774, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 0.00019726761858608805, + "loss": 0.7037, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9049334811529933, + "eval_f1": 0.9002840097359408, + "eval_loss": 0.8269202709197998, + "eval_precision": 0.8974380444011593, + "eval_recall": 0.9049334811529933, + "eval_runtime": 8.0701, + "eval_samples_per_second": 447.083, + "eval_steps_per_second": 3.594, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 0.00017122550764060861, + "loss": 0.7045, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 0.00014656425857707532, + "loss": 0.7038, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9085398263211256, + "eval_loss": 0.8176431655883789, + "eval_precision": 0.9086579230010748, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.6707, + "eval_samples_per_second": 470.364, + "eval_steps_per_second": 3.781, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 0.00012342751392436718, + "loss": 0.699, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 0.0001019500365447946, + "loss": 0.6975, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.9040742462795275, + "eval_loss": 0.8225728273391724, + "eval_precision": 0.9023252523718525, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.938, + "eval_samples_per_second": 454.521, + "eval_steps_per_second": 3.653, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 8.225692469095638e-05, + "loss": 0.6985, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9072800749263832, + "eval_loss": 0.8168608546257019, + "eval_precision": 0.9050856666072918, + "eval_recall": 0.9113082039911308, + "eval_runtime": 7.9808, + "eval_samples_per_second": 452.082, + "eval_steps_per_second": 3.634, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 6.44628833553244e-05, + "loss": 0.6929, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 4.8671556156672855e-05, + "loss": 0.6965, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.906914966022514, + "eval_loss": 0.81673264503479, + "eval_precision": 0.9042998263150301, + "eval_recall": 0.9115853658536586, + "eval_runtime": 7.939, + "eval_samples_per_second": 454.464, + "eval_steps_per_second": 3.653, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 3.497492165486024e-05, + "loss": 0.6931, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 2.3452757610206105e-05, + "loss": 0.6931, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9084932873089966, + "eval_loss": 0.8139696717262268, + "eval_precision": 0.9064485274126076, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.2019, + "eval_samples_per_second": 439.9, + "eval_steps_per_second": 3.536, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 1.4172176307950878e-05, + "loss": 0.6934, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9104148080911771, + "eval_loss": 0.8104686737060547, + "eval_precision": 0.9078149684288562, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.0885, + "eval_samples_per_second": 446.063, + "eval_steps_per_second": 3.585, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 7.187233654358336e-06, + "loss": 0.6936, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 2.538614321330241e-06, + "loss": 0.6948, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9139898150700022, + "eval_loss": 0.8071839809417725, + "eval_precision": 0.9111549356504294, + "eval_recall": 0.9182372505543237, + "eval_runtime": 7.6843, + "eval_samples_per_second": 469.532, + "eval_steps_per_second": 3.774, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 2.5339477344769754e-07, + "loss": 0.6888, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9103475532629652, + "eval_loss": 0.8139931559562683, + "eval_precision": 0.9083164277864412, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.8889, + "eval_samples_per_second": 457.352, + "eval_steps_per_second": 3.676, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.405218143894378, + "learning_rate": 0.0008165972038607772, + "metric": "eval/loss", + "use_mish": true + } +} diff --git a/run-vrdj6xlx/checkpoint-630/training_args.bin b/run-vrdj6xlx/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fec4005539dd4e53915d9108a3895903456c8636 --- /dev/null +++ b/run-vrdj6xlx/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0019c6fbaeee25232b03494f3c382fe9c3bca09be25c8a2887e71d2788f015cc +size 4792 diff --git a/run-vyijdlbe/checkpoint-616/model.safetensors b/run-vyijdlbe/checkpoint-616/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67c76e783aef5f56325eeccf978845ce5049b90b --- /dev/null +++ b/run-vyijdlbe/checkpoint-616/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f464518269a554b4b2056fece5795e279aaa89ef294dd115f47554c89a6283b +size 198025308 diff --git a/run-vyijdlbe/checkpoint-616/optimizer.pt b/run-vyijdlbe/checkpoint-616/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..77433f0cffc7511d9fdf8b4dfe480369f63fd584 --- /dev/null +++ b/run-vyijdlbe/checkpoint-616/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a00dde31372119d9bbf8cb4eec4e9c9e2c02705133ee78583b479c22da0d289 +size 395900602 diff --git a/run-vyijdlbe/checkpoint-616/rng_state.pth b/run-vyijdlbe/checkpoint-616/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cc9452644ccf50fb0c46ce6ff3a3b902206ef1e --- /dev/null +++ b/run-vyijdlbe/checkpoint-616/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2b8d6226f0184e5c70dee9da0c1834479caa80b2d9d884900c60c869ae9ea +size 14244 diff --git a/run-vyijdlbe/checkpoint-616/scheduler.pt b/run-vyijdlbe/checkpoint-616/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..13b432a069c1a3fc60027019b775036a106986dc --- /dev/null +++ b/run-vyijdlbe/checkpoint-616/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2a39addc63bf08c05fee6025526950d94468fb1eb627e91be3e960dba73288 +size 1064 diff --git a/run-vyijdlbe/checkpoint-616/trainer_state.json b/run-vyijdlbe/checkpoint-616/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..37a817edf405dfd721be8f3df227d002d2c41ebb --- /dev/null +++ b/run-vyijdlbe/checkpoint-616/trainer_state.json @@ -0,0 +1,658 @@ +{ + "best_metric": 0.9184336979587917, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-vyijdlbe/checkpoint-616", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 616, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.9402818520728292e-05, + "loss": 1.4351, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9814352989196777, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.287, + "eval_samples_per_second": 435.382, + "eval_steps_per_second": 3.499, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 5.8805637041456584e-05, + "loss": 1.0555, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 8.820845556218487e-05, + "loss": 0.9108, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8717995771830329, + "eval_loss": 0.9692619442939758, + "eval_precision": 0.8836952676710372, + "eval_recall": 0.8902439024390244, + "eval_runtime": 7.3235, + "eval_samples_per_second": 492.661, + "eval_steps_per_second": 3.96, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00011761127408291317, + "loss": 0.8394, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9063867054601452, + "eval_loss": 0.8132822513580322, + "eval_precision": 0.902399962878747, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.908, + "eval_samples_per_second": 456.25, + "eval_steps_per_second": 3.667, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00014701409260364144, + "loss": 0.8091, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00017641691112436973, + "loss": 0.7861, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9083208354831361, + "eval_loss": 0.8039150238037109, + "eval_precision": 0.9065709403946882, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.2049, + "eval_samples_per_second": 439.735, + "eval_steps_per_second": 3.534, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00020581972964509802, + "loss": 0.7922, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0002147167527729585, + "loss": 0.7702, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8700110864745011, + "eval_f1": 0.8767130224609729, + "eval_loss": 0.8767539262771606, + "eval_precision": 0.8992340430258651, + "eval_recall": 0.8700110864745011, + "eval_runtime": 7.3334, + "eval_samples_per_second": 491.995, + "eval_steps_per_second": 3.955, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00021397150413130604, + "loss": 0.7738, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8816518847006651, + "eval_f1": 0.8861926228851484, + "eval_loss": 0.8532981276512146, + "eval_precision": 0.9042846653061574, + "eval_recall": 0.8816518847006651, + "eval_runtime": 7.9192, + "eval_samples_per_second": 455.601, + "eval_steps_per_second": 3.662, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00021260571083683632, + "loss": 0.7665, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0002106273281233755, + "loss": 0.7579, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.8984252403943909, + "eval_loss": 0.818888247013092, + "eval_precision": 0.9002893403278945, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.5568, + "eval_samples_per_second": 477.453, + "eval_steps_per_second": 3.838, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00020804787932842038, + "loss": 0.7596, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00020488238877401836, + "loss": 0.7484, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7214523281596452, + "eval_f1": 0.7735831025485165, + "eval_loss": 1.0999703407287598, + "eval_precision": 0.8987366396596387, + "eval_recall": 0.7214523281596452, + "eval_runtime": 7.5212, + "eval_samples_per_second": 479.708, + "eval_steps_per_second": 3.856, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00020114929425572498, + "loss": 0.7356, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9035476718403548, + "eval_f1": 0.8915963751603405, + "eval_loss": 0.8293518424034119, + "eval_precision": 0.8966112543211039, + "eval_recall": 0.9035476718403548, + "eval_runtime": 7.5748, + "eval_samples_per_second": 476.314, + "eval_steps_per_second": 3.828, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001968703396493582, + "loss": 0.7419, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00019207044826107474, + "loss": 0.7358, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9039059835466449, + "eval_loss": 0.8097395300865173, + "eval_precision": 0.9041875622601486, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.6979, + "eval_samples_per_second": 468.702, + "eval_steps_per_second": 3.767, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.000186777577658457, + "loss": 0.7273, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9007760532150776, + "eval_f1": 0.9023545748050417, + "eval_loss": 0.8265562653541565, + "eval_precision": 0.9052645387409299, + "eval_recall": 0.9007760532150776, + "eval_runtime": 7.639, + "eval_samples_per_second": 472.315, + "eval_steps_per_second": 3.796, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00018102255682816455, + "loss": 0.7336, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00017483890660864674, + "loss": 0.7209, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9089133827097554, + "eval_loss": 0.8167888522148132, + "eval_precision": 0.9086968509698742, + "eval_recall": 0.9110310421286031, + "eval_runtime": 7.5592, + "eval_samples_per_second": 477.301, + "eval_steps_per_second": 3.836, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00016826264444382735, + "loss": 0.7163, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001613320745949988, + "loss": 0.7129, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9018847006651884, + "eval_f1": 0.9012244712097227, + "eval_loss": 0.8291696906089783, + "eval_precision": 0.9018538659238045, + "eval_recall": 0.9018847006651884, + "eval_runtime": 7.9838, + "eval_samples_per_second": 451.915, + "eval_steps_per_second": 3.632, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00015408756503286258, + "loss": 0.7124, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9039748757937793, + "eval_loss": 0.8155338764190674, + "eval_precision": 0.9023538309292725, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.2333, + "eval_samples_per_second": 438.22, + "eval_steps_per_second": 3.522, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00014657131230923635, + "loss": 0.7109, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001388270957779612, + "loss": 0.7101, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9086025176958367, + "eval_loss": 0.8127209544181824, + "eval_precision": 0.9081311101260359, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.6387, + "eval_samples_per_second": 472.334, + "eval_steps_per_second": 3.796, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00013090002259658106, + "loss": 0.7084, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001228362649940635, + "loss": 0.7085, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9055624715583143, + "eval_loss": 0.8053682446479797, + "eval_precision": 0.9016359386933901, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8765, + "eval_samples_per_second": 458.071, + "eval_steps_per_second": 3.682, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00011468279133487818, + "loss": 0.7021, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.910554431711864, + "eval_loss": 0.8119134902954102, + "eval_precision": 0.9117329090775393, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8187, + "eval_samples_per_second": 461.46, + "eval_steps_per_second": 3.709, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00010648709254588525, + "loss": 0.7043, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 9.829690549949332e-05, + "loss": 0.6992, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9105133154559656, + "eval_loss": 0.8117478489875793, + "eval_precision": 0.9070457755175231, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.8476, + "eval_samples_per_second": 459.761, + "eval_steps_per_second": 3.695, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 9.015993496427706e-05, + "loss": 0.6992, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 8.212357574258742e-05, + "loss": 0.6984, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.903928934565224, + "eval_loss": 0.8191829919815063, + "eval_precision": 0.9004779655423287, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.4793, + "eval_samples_per_second": 425.508, + "eval_steps_per_second": 3.42, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 7.423463661359907e-05, + "loss": 0.6961, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9109818061777307, + "eval_loss": 0.820063054561615, + "eval_precision": 0.9115403970321969, + "eval_recall": 0.9113082039911308, + "eval_runtime": 8.1932, + "eval_samples_per_second": 440.367, + "eval_steps_per_second": 3.54, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 6.653906768972285e-05, + "loss": 0.698, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.9081692774430526e-05, + "loss": 0.6955, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9122964370853305, + "eval_loss": 0.8088396787643433, + "eval_precision": 0.9115756601255808, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.2701, + "eval_samples_per_second": 436.27, + "eval_steps_per_second": 3.507, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 5.190594828040628e-05, + "loss": 0.693, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9078703226608745, + "eval_loss": 0.8125221133232117, + "eval_precision": 0.905052012058138, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8946, + "eval_samples_per_second": 457.02, + "eval_steps_per_second": 3.673, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.505363022872992e-05, + "loss": 0.6942, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.856465080272568e-05, + "loss": 0.6915, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9218403547671841, + "eval_f1": 0.9176933872210846, + "eval_loss": 0.7996315956115723, + "eval_precision": 0.9160476159258204, + "eval_recall": 0.9218403547671841, + "eval_runtime": 7.9712, + "eval_samples_per_second": 452.631, + "eval_steps_per_second": 3.638, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.2476805874459645e-05, + "loss": 0.6902, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.6825554857956976e-05, + "loss": 0.6883, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9109062276751227, + "eval_loss": 0.807771623134613, + "eval_precision": 0.9096562796604263, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.7316, + "eval_samples_per_second": 466.659, + "eval_steps_per_second": 3.751, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.1643814171410913e-05, + "loss": 0.6901, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9103436538369089, + "eval_loss": 0.8104673624038696, + "eval_precision": 0.9085260044764135, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.0452, + "eval_samples_per_second": 448.469, + "eval_steps_per_second": 3.605, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.6961765511388913e-05, + "loss": 0.6868, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.2806680055767229e-05, + "loss": 0.6895, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9141992990978022, + "eval_loss": 0.807719349861145, + "eval_precision": 0.9121909061620478, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0834, + "eval_samples_per_second": 446.347, + "eval_steps_per_second": 3.588, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 9.202759619345176e-06, + "loss": 0.6876, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 6.1709956873485236e-06, + "loss": 0.687, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9090535082023486, + "eval_loss": 0.8098934888839722, + "eval_precision": 0.9064474315672072, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9243, + "eval_samples_per_second": 455.309, + "eval_steps_per_second": 3.66, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 3.7290471478990914e-06, + "loss": 0.6887, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9132500463564304, + "eval_loss": 0.8081458210945129, + "eval_precision": 0.909960787008455, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.3818, + "eval_samples_per_second": 488.773, + "eval_steps_per_second": 3.929, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.8911374356127023e-06, + "loss": 0.6893, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 6.679716854257141e-07, + "loss": 0.6896, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9184336979587917, + "eval_loss": 0.7984321117401123, + "eval_precision": 0.916259009059707, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.5473, + "eval_samples_per_second": 478.051, + "eval_steps_per_second": 3.842, + "step": 616 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4684093006515007, + "learning_rate": 0.00021486675072839904, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-vyijdlbe/checkpoint-616/training_args.bin b/run-vyijdlbe/checkpoint-616/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbab51b8224356cedb24f163581be2a34c75393f --- /dev/null +++ b/run-vyijdlbe/checkpoint-616/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ebc97b6755d42b376a4f207607fbef7b569ad391a56abc6db5fab7df38fbef +size 4792 diff --git a/run-vyijdlbe/checkpoint-630/model.safetensors b/run-vyijdlbe/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7da38009c3edb66df4af1e092651c8a8ca155f0 --- /dev/null +++ b/run-vyijdlbe/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0389d90061389feadd4d2590d99ddca58c01ab9561f697b2c53f1454ab220516 +size 198025308 diff --git a/run-vyijdlbe/checkpoint-630/optimizer.pt b/run-vyijdlbe/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f16ab0cb07f3d275462babe96afa18a078dda1ac --- /dev/null +++ b/run-vyijdlbe/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3413465caf0e50ee80ec06dd1a73aa974e72e92e814f5b1988646392938b506 +size 395900602 diff --git a/run-vyijdlbe/checkpoint-630/rng_state.pth b/run-vyijdlbe/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-vyijdlbe/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-vyijdlbe/checkpoint-630/scheduler.pt b/run-vyijdlbe/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ca865b7b7c46862aeaf72bd363dde90d7445f45 --- /dev/null +++ b/run-vyijdlbe/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d7eec08d0ec6727ff09330b1cf38c49e4b9816dd105ba276758b86785a0f70 +size 1064 diff --git a/run-vyijdlbe/checkpoint-630/trainer_state.json b/run-vyijdlbe/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..752663078a908187f6f787a95db758714de51ee5 --- /dev/null +++ b/run-vyijdlbe/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9184336979587917, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-vyijdlbe/checkpoint-616", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.9402818520728292e-05, + "loss": 1.4351, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9814352989196777, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.287, + "eval_samples_per_second": 435.382, + "eval_steps_per_second": 3.499, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 5.8805637041456584e-05, + "loss": 1.0555, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 8.820845556218487e-05, + "loss": 0.9108, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8902439024390244, + "eval_f1": 0.8717995771830329, + "eval_loss": 0.9692619442939758, + "eval_precision": 0.8836952676710372, + "eval_recall": 0.8902439024390244, + "eval_runtime": 7.3235, + "eval_samples_per_second": 492.661, + "eval_steps_per_second": 3.96, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 0.00011761127408291317, + "loss": 0.8394, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9063867054601452, + "eval_loss": 0.8132822513580322, + "eval_precision": 0.902399962878747, + "eval_recall": 0.9135254988913526, + "eval_runtime": 7.908, + "eval_samples_per_second": 456.25, + "eval_steps_per_second": 3.667, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00014701409260364144, + "loss": 0.8091, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00017641691112436973, + "loss": 0.7861, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9115853658536586, + "eval_f1": 0.9083208354831361, + "eval_loss": 0.8039150238037109, + "eval_precision": 0.9065709403946882, + "eval_recall": 0.9115853658536586, + "eval_runtime": 8.2049, + "eval_samples_per_second": 439.735, + "eval_steps_per_second": 3.534, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00020581972964509802, + "loss": 0.7922, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.0002147167527729585, + "loss": 0.7702, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8700110864745011, + "eval_f1": 0.8767130224609729, + "eval_loss": 0.8767539262771606, + "eval_precision": 0.8992340430258651, + "eval_recall": 0.8700110864745011, + "eval_runtime": 7.3334, + "eval_samples_per_second": 491.995, + "eval_steps_per_second": 3.955, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.00021397150413130604, + "loss": 0.7738, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8816518847006651, + "eval_f1": 0.8861926228851484, + "eval_loss": 0.8532981276512146, + "eval_precision": 0.9042846653061574, + "eval_recall": 0.8816518847006651, + "eval_runtime": 7.9192, + "eval_samples_per_second": 455.601, + "eval_steps_per_second": 3.662, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00021260571083683632, + "loss": 0.7665, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0002106273281233755, + "loss": 0.7579, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.8984252403943909, + "eval_loss": 0.818888247013092, + "eval_precision": 0.9002893403278945, + "eval_recall": 0.9104767184035477, + "eval_runtime": 7.5568, + "eval_samples_per_second": 477.453, + "eval_steps_per_second": 3.838, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00020804787932842038, + "loss": 0.7596, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00020488238877401836, + "loss": 0.7484, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7214523281596452, + "eval_f1": 0.7735831025485165, + "eval_loss": 1.0999703407287598, + "eval_precision": 0.8987366396596387, + "eval_recall": 0.7214523281596452, + "eval_runtime": 7.5212, + "eval_samples_per_second": 479.708, + "eval_steps_per_second": 3.856, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00020114929425572498, + "loss": 0.7356, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9035476718403548, + "eval_f1": 0.8915963751603405, + "eval_loss": 0.8293518424034119, + "eval_precision": 0.8966112543211039, + "eval_recall": 0.9035476718403548, + "eval_runtime": 7.5748, + "eval_samples_per_second": 476.314, + "eval_steps_per_second": 3.828, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.0001968703396493582, + "loss": 0.7419, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00019207044826107474, + "loss": 0.7358, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9039059835466449, + "eval_loss": 0.8097395300865173, + "eval_precision": 0.9041875622601486, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.6979, + "eval_samples_per_second": 468.702, + "eval_steps_per_second": 3.767, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.000186777577658457, + "loss": 0.7273, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9007760532150776, + "eval_f1": 0.9023545748050417, + "eval_loss": 0.8265562653541565, + "eval_precision": 0.9052645387409299, + "eval_recall": 0.9007760532150776, + "eval_runtime": 7.639, + "eval_samples_per_second": 472.315, + "eval_steps_per_second": 3.796, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00018102255682816455, + "loss": 0.7336, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00017483890660864674, + "loss": 0.7209, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9089133827097554, + "eval_loss": 0.8167888522148132, + "eval_precision": 0.9086968509698742, + "eval_recall": 0.9110310421286031, + "eval_runtime": 7.5592, + "eval_samples_per_second": 477.301, + "eval_steps_per_second": 3.836, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00016826264444382735, + "loss": 0.7163, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001613320745949988, + "loss": 0.7129, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9018847006651884, + "eval_f1": 0.9012244712097227, + "eval_loss": 0.8291696906089783, + "eval_precision": 0.9018538659238045, + "eval_recall": 0.9018847006651884, + "eval_runtime": 7.9838, + "eval_samples_per_second": 451.915, + "eval_steps_per_second": 3.632, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00015408756503286258, + "loss": 0.7124, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9110310421286031, + "eval_f1": 0.9039748757937793, + "eval_loss": 0.8155338764190674, + "eval_precision": 0.9023538309292725, + "eval_recall": 0.9110310421286031, + "eval_runtime": 8.2333, + "eval_samples_per_second": 438.22, + "eval_steps_per_second": 3.522, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00014657131230923635, + "loss": 0.7109, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001388270957779612, + "loss": 0.7101, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9086025176958367, + "eval_loss": 0.8127209544181824, + "eval_precision": 0.9081311101260359, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.6387, + "eval_samples_per_second": 472.334, + "eval_steps_per_second": 3.796, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00013090002259658106, + "loss": 0.7084, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001228362649940635, + "loss": 0.7085, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9055624715583143, + "eval_loss": 0.8053682446479797, + "eval_precision": 0.9016359386933901, + "eval_recall": 0.9126940133037694, + "eval_runtime": 7.8765, + "eval_samples_per_second": 458.071, + "eval_steps_per_second": 3.682, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00011468279133487818, + "loss": 0.7021, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.910554431711864, + "eval_loss": 0.8119134902954102, + "eval_precision": 0.9117329090775393, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8187, + "eval_samples_per_second": 461.46, + "eval_steps_per_second": 3.709, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.00010648709254588525, + "loss": 0.7043, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 9.829690549949332e-05, + "loss": 0.6992, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9105133154559656, + "eval_loss": 0.8117478489875793, + "eval_precision": 0.9070457755175231, + "eval_recall": 0.9151884700665188, + "eval_runtime": 7.8476, + "eval_samples_per_second": 459.761, + "eval_steps_per_second": 3.695, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 9.015993496427706e-05, + "loss": 0.6992, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 8.212357574258742e-05, + "loss": 0.6984, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.903928934565224, + "eval_loss": 0.8191829919815063, + "eval_precision": 0.9004779655423287, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.4793, + "eval_samples_per_second": 425.508, + "eval_steps_per_second": 3.42, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 7.423463661359907e-05, + "loss": 0.6961, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9109818061777307, + "eval_loss": 0.820063054561615, + "eval_precision": 0.9115403970321969, + "eval_recall": 0.9113082039911308, + "eval_runtime": 8.1932, + "eval_samples_per_second": 440.367, + "eval_steps_per_second": 3.54, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 6.653906768972285e-05, + "loss": 0.698, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.9081692774430526e-05, + "loss": 0.6955, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9162971175166297, + "eval_f1": 0.9122964370853305, + "eval_loss": 0.8088396787643433, + "eval_precision": 0.9115756601255808, + "eval_recall": 0.9162971175166297, + "eval_runtime": 8.2701, + "eval_samples_per_second": 436.27, + "eval_steps_per_second": 3.507, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 5.190594828040628e-05, + "loss": 0.693, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9078703226608745, + "eval_loss": 0.8125221133232117, + "eval_precision": 0.905052012058138, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8946, + "eval_samples_per_second": 457.02, + "eval_steps_per_second": 3.673, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.505363022872992e-05, + "loss": 0.6942, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.856465080272568e-05, + "loss": 0.6915, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9218403547671841, + "eval_f1": 0.9176933872210846, + "eval_loss": 0.7996315956115723, + "eval_precision": 0.9160476159258204, + "eval_recall": 0.9218403547671841, + "eval_runtime": 7.9712, + "eval_samples_per_second": 452.631, + "eval_steps_per_second": 3.638, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.2476805874459645e-05, + "loss": 0.6902, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.6825554857956976e-05, + "loss": 0.6883, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9109062276751227, + "eval_loss": 0.807771623134613, + "eval_precision": 0.9096562796604263, + "eval_recall": 0.9138026607538803, + "eval_runtime": 7.7316, + "eval_samples_per_second": 466.659, + "eval_steps_per_second": 3.751, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.1643814171410913e-05, + "loss": 0.6901, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9138026607538803, + "eval_f1": 0.9103436538369089, + "eval_loss": 0.8104673624038696, + "eval_precision": 0.9085260044764135, + "eval_recall": 0.9138026607538803, + "eval_runtime": 8.0452, + "eval_samples_per_second": 448.469, + "eval_steps_per_second": 3.605, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.6961765511388913e-05, + "loss": 0.6868, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.2806680055767229e-05, + "loss": 0.6895, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9141992990978022, + "eval_loss": 0.807719349861145, + "eval_precision": 0.9121909061620478, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.0834, + "eval_samples_per_second": 446.347, + "eval_steps_per_second": 3.588, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 9.202759619345176e-06, + "loss": 0.6876, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 6.1709956873485236e-06, + "loss": 0.687, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.9090535082023486, + "eval_loss": 0.8098934888839722, + "eval_precision": 0.9064474315672072, + "eval_recall": 0.9121396895787139, + "eval_runtime": 7.9243, + "eval_samples_per_second": 455.309, + "eval_steps_per_second": 3.66, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 3.7290471478990914e-06, + "loss": 0.6887, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9132500463564304, + "eval_loss": 0.8081458210945129, + "eval_precision": 0.909960787008455, + "eval_recall": 0.9174057649667405, + "eval_runtime": 7.3818, + "eval_samples_per_second": 488.773, + "eval_steps_per_second": 3.929, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.8911374356127023e-06, + "loss": 0.6893, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 6.679716854257141e-07, + "loss": 0.6896, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9184336979587917, + "eval_loss": 0.7984321117401123, + "eval_precision": 0.916259009059707, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.5473, + "eval_samples_per_second": 478.051, + "eval_steps_per_second": 3.842, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 6.667437919803136e-08, + "loss": 0.6836, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9110444175362757, + "eval_loss": 0.8076308369636536, + "eval_precision": 0.9085693367776642, + "eval_recall": 0.9149113082039911, + "eval_runtime": 7.5896, + "eval_samples_per_second": 475.386, + "eval_steps_per_second": 3.821, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "dropout_rate": 0.4684093006515007, + "learning_rate": 0.00021486675072839904, + "metric": "eval/loss", + "use_mish": false + } +} diff --git a/run-vyijdlbe/checkpoint-630/training_args.bin b/run-vyijdlbe/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbab51b8224356cedb24f163581be2a34c75393f --- /dev/null +++ b/run-vyijdlbe/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ebc97b6755d42b376a4f207607fbef7b569ad391a56abc6db5fab7df38fbef +size 4792 diff --git a/run-w9lpu36i/checkpoint-1232/model.safetensors b/run-w9lpu36i/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ded44f689f4c0a098dfb95c33828140d0345bf53 --- /dev/null +++ b/run-w9lpu36i/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c4448de8477e82bc771b111dbd9655051d6b667f8aaee179f6def9bdd65ea6 +size 198025308 diff --git a/run-w9lpu36i/checkpoint-1232/optimizer.pt b/run-w9lpu36i/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..81648f2893446532784768b850c97f98fed1198b --- /dev/null +++ b/run-w9lpu36i/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85b2ad1bea72c0cd8f1f51b3bd579ce82442c054b747a6c154f5b4cfe3399ee +size 395900602 diff --git a/run-w9lpu36i/checkpoint-1232/rng_state.pth b/run-w9lpu36i/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-w9lpu36i/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-w9lpu36i/checkpoint-1232/scheduler.pt b/run-w9lpu36i/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..861eae4a3c97fb25410573e5ad734255d8746676 --- /dev/null +++ b/run-w9lpu36i/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7caf4de8099732c348bea7991d317d4ac53650de88471dcdcd8fefcbdbc66c +size 1064 diff --git a/run-w9lpu36i/checkpoint-1232/trainer_state.json b/run-w9lpu36i/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ac94b0f1e772ec769705a5b29f111132270d3548 --- /dev/null +++ b/run-w9lpu36i/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9240576496674058, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-w9lpu36i/checkpoint-1147", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.2307382791337495e-05, + "loss": 1.3646, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8381374722838137, + "eval_loss": 0.9295976161956787, + "eval_runtime": 6.9758, + "eval_samples_per_second": 517.218, + "eval_steps_per_second": 8.171, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.461476558267499e-05, + "loss": 0.9523, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 6.692214837401248e-05, + "loss": 0.8534, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.820833146572113, + "eval_runtime": 6.9984, + "eval_samples_per_second": 515.55, + "eval_steps_per_second": 8.145, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 8.922953116534998e-05, + "loss": 0.8145, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8067387342453003, + "eval_runtime": 6.6337, + "eval_samples_per_second": 543.89, + "eval_steps_per_second": 8.593, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00011153691395668747, + "loss": 0.7979, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013384429674802496, + "loss": 0.7838, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8136371970176697, + "eval_runtime": 6.8518, + "eval_samples_per_second": 526.575, + "eval_steps_per_second": 8.319, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00015615167953936243, + "loss": 0.7781, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00017845906233069996, + "loss": 0.7751, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8167554140090942, + "eval_runtime": 6.6206, + "eval_samples_per_second": 544.968, + "eval_steps_per_second": 8.61, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00020076644512203746, + "loss": 0.7705, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8802660753880266, + "eval_loss": 0.8545175194740295, + "eval_runtime": 6.8862, + "eval_samples_per_second": 523.943, + "eval_steps_per_second": 8.277, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00022307382791337494, + "loss": 0.7631, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00024538121070471244, + "loss": 0.7573, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8062652945518494, + "eval_runtime": 6.6472, + "eval_samples_per_second": 542.787, + "eval_steps_per_second": 8.575, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00026596973234740235, + "loss": 0.7591, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00026540295535401116, + "loss": 0.7574, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8888580931263859, + "eval_loss": 0.8478719592094421, + "eval_runtime": 7.1678, + "eval_samples_per_second": 503.365, + "eval_steps_per_second": 7.952, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00026385787349664995, + "loss": 0.7439, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8226559162139893, + "eval_runtime": 6.4816, + "eval_samples_per_second": 556.649, + "eval_steps_per_second": 8.794, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00026134590196569713, + "loss": 0.7516, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00025788559941104864, + "loss": 0.7492, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8259598016738892, + "eval_runtime": 6.6996, + "eval_samples_per_second": 538.539, + "eval_steps_per_second": 8.508, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002535025308293034, + "loss": 0.7423, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9018847006651884, + "eval_loss": 0.8342524766921997, + "eval_runtime": 6.8789, + "eval_samples_per_second": 524.506, + "eval_steps_per_second": 8.286, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00024822907868749, + "loss": 0.7382, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00024210420367876782, + "loss": 0.7285, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8182247281074524, + "eval_runtime": 6.6316, + "eval_samples_per_second": 544.064, + "eval_steps_per_second": 8.595, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002351731568776582, + "loss": 0.7298, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00022748714542142697, + "loss": 0.7312, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8231853246688843, + "eval_runtime": 6.6718, + "eval_samples_per_second": 540.784, + "eval_steps_per_second": 8.543, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00021910295418759132, + "loss": 0.7254, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8235146403312683, + "eval_runtime": 6.8666, + "eval_samples_per_second": 525.438, + "eval_steps_per_second": 8.301, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00021008252626262774, + "loss": 0.7298, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00020049250530141348, + "loss": 0.7193, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.828417181968689, + "eval_runtime": 6.7994, + "eval_samples_per_second": 530.632, + "eval_steps_per_second": 8.383, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00019040374315848575, + "loss": 0.7195, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001798907764287805, + "loss": 0.7147, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9021618625277162, + "eval_loss": 0.8194006085395813, + "eval_runtime": 6.6562, + "eval_samples_per_second": 542.055, + "eval_steps_per_second": 8.564, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00016903127576520872, + "loss": 0.7123, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8136492967605591, + "eval_runtime": 6.8677, + "eval_samples_per_second": 525.356, + "eval_steps_per_second": 8.3, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00015790547204155844, + "loss": 0.7096, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00014659556360027614, + "loss": 0.7086, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8072060942649841, + "eval_runtime": 6.4998, + "eval_samples_per_second": 555.094, + "eval_steps_per_second": 8.769, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013518510896443002, + "loss": 0.708, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00012375840950054813, + "loss": 0.7074, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8188920617103577, + "eval_runtime": 6.899, + "eval_samples_per_second": 522.971, + "eval_steps_per_second": 8.262, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00011239988659326889, + "loss": 0.7093, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8093069791793823, + "eval_runtime": 6.7943, + "eval_samples_per_second": 531.035, + "eval_steps_per_second": 8.389, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00010119345793328764, + "loss": 0.6977, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 9.022191752663539e-05, + "loss": 0.696, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8145430088043213, + "eval_runtime": 6.8149, + "eval_samples_per_second": 529.427, + "eval_steps_per_second": 8.364, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 7.956632400583078e-05, + "loss": 0.6932, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8117313385009766, + "eval_runtime": 6.9616, + "eval_samples_per_second": 518.272, + "eval_steps_per_second": 8.188, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 6.930540176211327e-05, + "loss": 0.6986, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 5.9514959323240176e-05, + "loss": 0.6955, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8096935749053955, + "eval_runtime": 6.5085, + "eval_samples_per_second": 554.355, + "eval_steps_per_second": 8.758, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 5.02673292739201e-05, + "loss": 0.6946, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 4.1630833856796586e-05, + "loss": 0.691, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8032962083816528, + "eval_runtime": 6.5025, + "eval_samples_per_second": 554.862, + "eval_steps_per_second": 8.766, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 3.3669280202165004e-05, + "loss": 0.6887, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8054246306419373, + "eval_runtime": 6.8243, + "eval_samples_per_second": 528.7, + "eval_steps_per_second": 8.353, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 2.6441488915705853e-05, + "loss": 0.6871, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.0000859507068455e-05, + "loss": 0.6868, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8097283244132996, + "eval_runtime": 6.9667, + "eval_samples_per_second": 517.891, + "eval_steps_per_second": 8.182, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.4394975869951762e-05, + "loss": 0.6858, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 9.665254728429276e-06, + "loss": 0.6879, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.8027458786964417, + "eval_runtime": 6.9582, + "eval_samples_per_second": 518.523, + "eval_steps_per_second": 8.192, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 5.84663964682447e-06, + "loss": 0.6902, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8069021105766296, + "eval_runtime": 6.6658, + "eval_samples_per_second": 541.271, + "eval_steps_per_second": 8.551, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 2.9673428638164793e-06, + "loss": 0.6841, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.048636858127158e-06, + "loss": 0.6853, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8012217879295349, + "eval_runtime": 6.9858, + "eval_samples_per_second": 516.479, + "eval_steps_per_second": 8.159, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00026597264097363935, + "metric": "eval/loss", + "warmup_ratio": 0.2458163123166444 + } +} diff --git a/run-w9lpu36i/checkpoint-1232/training_args.bin b/run-w9lpu36i/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..07e61f832c28c4907cfae333db29b858a00b42aa --- /dev/null +++ b/run-w9lpu36i/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d642452bb9496687f98584402bd5dffee8bcf416a8397202457d0c4d60672d4a +size 4792 diff --git a/run-w9lpu36i/checkpoint-1260/model.safetensors b/run-w9lpu36i/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..079d7d12e0924f39cc35d24658bf292439183a73 --- /dev/null +++ b/run-w9lpu36i/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:025af651e61c0907c2132e1833790ff57a6d39bf21c501724f7208d6c13fe100 +size 198025308 diff --git a/run-w9lpu36i/checkpoint-1260/optimizer.pt b/run-w9lpu36i/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..63e5b5ec8254ff36193da1c68d8968afd37ed054 --- /dev/null +++ b/run-w9lpu36i/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17e6eea35ba27ee0cbf0df96afcdc3e8ab79c54edcab549c11985fe7400ca655 +size 395900602 diff --git a/run-w9lpu36i/checkpoint-1260/rng_state.pth b/run-w9lpu36i/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-w9lpu36i/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-w9lpu36i/checkpoint-1260/scheduler.pt b/run-w9lpu36i/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f830c01dd3e6158ff6c6115269566a68d964ca7 --- /dev/null +++ b/run-w9lpu36i/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e446fdf59240ce72d67d89b48a31b84df220a42981ed260f30a96c63652c2440 +size 1064 diff --git a/run-w9lpu36i/checkpoint-1260/trainer_state.json b/run-w9lpu36i/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fa8f0840ab72643df76969e9debedb91cfe92906 --- /dev/null +++ b/run-w9lpu36i/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9259977827050998, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-w9lpu36i/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.2307382791337495e-05, + "loss": 1.3646, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8381374722838137, + "eval_loss": 0.9295976161956787, + "eval_runtime": 6.9758, + "eval_samples_per_second": 517.218, + "eval_steps_per_second": 8.171, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 4.461476558267499e-05, + "loss": 0.9523, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 6.692214837401248e-05, + "loss": 0.8534, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.820833146572113, + "eval_runtime": 6.9984, + "eval_samples_per_second": 515.55, + "eval_steps_per_second": 8.145, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 8.922953116534998e-05, + "loss": 0.8145, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8067387342453003, + "eval_runtime": 6.6337, + "eval_samples_per_second": 543.89, + "eval_steps_per_second": 8.593, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.00011153691395668747, + "loss": 0.7979, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013384429674802496, + "loss": 0.7838, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8136371970176697, + "eval_runtime": 6.8518, + "eval_samples_per_second": 526.575, + "eval_steps_per_second": 8.319, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.00015615167953936243, + "loss": 0.7781, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00017845906233069996, + "loss": 0.7751, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9049334811529933, + "eval_loss": 0.8167554140090942, + "eval_runtime": 6.6206, + "eval_samples_per_second": 544.968, + "eval_steps_per_second": 8.61, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.00020076644512203746, + "loss": 0.7705, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8802660753880266, + "eval_loss": 0.8545175194740295, + "eval_runtime": 6.8862, + "eval_samples_per_second": 523.943, + "eval_steps_per_second": 8.277, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00022307382791337494, + "loss": 0.7631, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00024538121070471244, + "loss": 0.7573, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8062652945518494, + "eval_runtime": 6.6472, + "eval_samples_per_second": 542.787, + "eval_steps_per_second": 8.575, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00026596973234740235, + "loss": 0.7591, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00026540295535401116, + "loss": 0.7574, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8888580931263859, + "eval_loss": 0.8478719592094421, + "eval_runtime": 7.1678, + "eval_samples_per_second": 503.365, + "eval_steps_per_second": 7.952, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00026385787349664995, + "loss": 0.7439, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9032705099778271, + "eval_loss": 0.8226559162139893, + "eval_runtime": 6.4816, + "eval_samples_per_second": 556.649, + "eval_steps_per_second": 8.794, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.00026134590196569713, + "loss": 0.7516, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00025788559941104864, + "loss": 0.7492, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8259598016738892, + "eval_runtime": 6.6996, + "eval_samples_per_second": 538.539, + "eval_steps_per_second": 8.508, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002535025308293034, + "loss": 0.7423, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9018847006651884, + "eval_loss": 0.8342524766921997, + "eval_runtime": 6.8789, + "eval_samples_per_second": 524.506, + "eval_steps_per_second": 8.286, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.00024822907868749, + "loss": 0.7382, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.00024210420367876782, + "loss": 0.7285, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9085365853658537, + "eval_loss": 0.8182247281074524, + "eval_runtime": 6.6316, + "eval_samples_per_second": 544.064, + "eval_steps_per_second": 8.595, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0002351731568776582, + "loss": 0.7298, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00022748714542142697, + "loss": 0.7312, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9107538802660754, + "eval_loss": 0.8231853246688843, + "eval_runtime": 6.6718, + "eval_samples_per_second": 540.784, + "eval_steps_per_second": 8.543, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.00021910295418759132, + "loss": 0.7254, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9046563192904656, + "eval_loss": 0.8235146403312683, + "eval_runtime": 6.8666, + "eval_samples_per_second": 525.438, + "eval_steps_per_second": 8.301, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.00021008252626262774, + "loss": 0.7298, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00020049250530141348, + "loss": 0.7193, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9024390243902439, + "eval_loss": 0.828417181968689, + "eval_runtime": 6.7994, + "eval_samples_per_second": 530.632, + "eval_steps_per_second": 8.383, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00019040374315848575, + "loss": 0.7195, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001798907764287805, + "loss": 0.7147, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9021618625277162, + "eval_loss": 0.8194006085395813, + "eval_runtime": 6.6562, + "eval_samples_per_second": 542.055, + "eval_steps_per_second": 8.564, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00016903127576520872, + "loss": 0.7123, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8136492967605591, + "eval_runtime": 6.8677, + "eval_samples_per_second": 525.356, + "eval_steps_per_second": 8.3, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00015790547204155844, + "loss": 0.7096, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.00014659556360027614, + "loss": 0.7086, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9196230598669624, + "eval_loss": 0.8072060942649841, + "eval_runtime": 6.4998, + "eval_samples_per_second": 555.094, + "eval_steps_per_second": 8.769, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00013518510896443002, + "loss": 0.708, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00012375840950054813, + "loss": 0.7074, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8188920617103577, + "eval_runtime": 6.899, + "eval_samples_per_second": 522.971, + "eval_steps_per_second": 8.262, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00011239988659326889, + "loss": 0.7093, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8093069791793823, + "eval_runtime": 6.7943, + "eval_samples_per_second": 531.035, + "eval_steps_per_second": 8.389, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.00010119345793328764, + "loss": 0.6977, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 9.022191752663539e-05, + "loss": 0.696, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8145430088043213, + "eval_runtime": 6.8149, + "eval_samples_per_second": 529.427, + "eval_steps_per_second": 8.364, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 7.956632400583078e-05, + "loss": 0.6932, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8117313385009766, + "eval_runtime": 6.9616, + "eval_samples_per_second": 518.272, + "eval_steps_per_second": 8.188, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 6.930540176211327e-05, + "loss": 0.6986, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 5.9514959323240176e-05, + "loss": 0.6955, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.8096935749053955, + "eval_runtime": 6.5085, + "eval_samples_per_second": 554.355, + "eval_steps_per_second": 8.758, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 5.02673292739201e-05, + "loss": 0.6946, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 4.1630833856796586e-05, + "loss": 0.691, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8032962083816528, + "eval_runtime": 6.5025, + "eval_samples_per_second": 554.862, + "eval_steps_per_second": 8.766, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 3.3669280202165004e-05, + "loss": 0.6887, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8054246306419373, + "eval_runtime": 6.8243, + "eval_samples_per_second": 528.7, + "eval_steps_per_second": 8.353, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 2.6441488915705853e-05, + "loss": 0.6871, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.0000859507068455e-05, + "loss": 0.6868, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.8097283244132996, + "eval_runtime": 6.9667, + "eval_samples_per_second": 517.891, + "eval_steps_per_second": 8.182, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.4394975869951762e-05, + "loss": 0.6858, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 9.665254728429276e-06, + "loss": 0.6879, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9240576496674058, + "eval_loss": 0.8027458786964417, + "eval_runtime": 6.9582, + "eval_samples_per_second": 518.523, + "eval_steps_per_second": 8.192, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 5.84663964682447e-06, + "loss": 0.6902, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8069021105766296, + "eval_runtime": 6.6658, + "eval_samples_per_second": 541.271, + "eval_steps_per_second": 8.551, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 2.9673428638164793e-06, + "loss": 0.6841, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.048636858127158e-06, + "loss": 0.6853, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8012217879295349, + "eval_runtime": 6.9858, + "eval_samples_per_second": 516.479, + "eval_steps_per_second": 8.159, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.0469718571873196e-07, + "loss": 0.6876, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9259977827050998, + "eval_loss": 0.7955142259597778, + "eval_runtime": 6.8247, + "eval_samples_per_second": 528.671, + "eval_steps_per_second": 8.352, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00026597264097363935, + "metric": "eval/loss", + "warmup_ratio": 0.2458163123166444 + } +} diff --git a/run-w9lpu36i/checkpoint-1260/training_args.bin b/run-w9lpu36i/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..07e61f832c28c4907cfae333db29b858a00b42aa --- /dev/null +++ b/run-w9lpu36i/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d642452bb9496687f98584402bd5dffee8bcf416a8397202457d0c4d60672d4a +size 4792 diff --git a/run-wd18zkoi/checkpoint-510/model.safetensors b/run-wd18zkoi/checkpoint-510/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbfceb8d5d36525ca356280fdaef0ea6cb10098b --- /dev/null +++ b/run-wd18zkoi/checkpoint-510/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ed3296eece84f130ae72ffed655e3c27d24d55c1929141f97af0bd4d744571 +size 198025308 diff --git a/run-wd18zkoi/checkpoint-510/optimizer.pt b/run-wd18zkoi/checkpoint-510/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..19e2d130fe83bcb6845649d8aed446dcbad613e2 --- /dev/null +++ b/run-wd18zkoi/checkpoint-510/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b38a7c8480ce9a3cd6c3e13cce1479eb305d9f5d83d94c0076cb5d0d8ffb8812 +size 395900602 diff --git a/run-wd18zkoi/checkpoint-510/rng_state.pth b/run-wd18zkoi/checkpoint-510/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d8dc24231ef2178e0e7f1fb8387b9f8514188b8 --- /dev/null +++ b/run-wd18zkoi/checkpoint-510/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff11e63c65ca788e4e4341e1970557dcb3db9d0cb5075b86ffbecfbd1dc05a1a +size 14244 diff --git a/run-wd18zkoi/checkpoint-510/scheduler.pt b/run-wd18zkoi/checkpoint-510/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e720b5a441cd2fa2253caaf7ea9d54a363743459 --- /dev/null +++ b/run-wd18zkoi/checkpoint-510/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3ce46d9c6f565229e37c4302122c1d6690fe5ab0a242c066375d2b11694edb +size 1064 diff --git a/run-wd18zkoi/checkpoint-510/trainer_state.json b/run-wd18zkoi/checkpoint-510/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..373a64be7c8d6aaba94d6c8dd8e68fea16a6933a --- /dev/null +++ b/run-wd18zkoi/checkpoint-510/trainer_state.json @@ -0,0 +1,550 @@ +{ + "best_metric": 0.9156060090918173, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-wd18zkoi/checkpoint-510", + "epoch": 24.0, + "eval_steps": 500, + "global_step": 510, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.3608138770071086e-05, + "loss": 1.4413, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9790328741073608, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2616, + "eval_samples_per_second": 436.718, + "eval_steps_per_second": 3.51, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 4.721627754014217e-05, + "loss": 1.0525, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 7.082441631021327e-05, + "loss": 0.9025, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8835920177383592, + "eval_f1": 0.8731858034091996, + "eval_loss": 0.9170128703117371, + "eval_precision": 0.8852074108554507, + "eval_recall": 0.8835920177383592, + "eval_runtime": 7.9842, + "eval_samples_per_second": 451.895, + "eval_steps_per_second": 3.632, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 9.443255508028434e-05, + "loss": 0.8344, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.8975011330398239, + "eval_loss": 0.8190191388130188, + "eval_precision": 0.8937273605667937, + "eval_recall": 0.9065964523281597, + "eval_runtime": 8.3092, + "eval_samples_per_second": 434.216, + "eval_steps_per_second": 3.49, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00011804069385035545, + "loss": 0.8126, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00014164883262042654, + "loss": 0.7889, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.8996744506086427, + "eval_loss": 0.8195714354515076, + "eval_precision": 0.8957418127984982, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.9346, + "eval_samples_per_second": 454.717, + "eval_steps_per_second": 3.655, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00016525697139049763, + "loss": 0.7893, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00018886511016056868, + "loss": 0.7771, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9039018520068465, + "eval_loss": 0.8151628375053406, + "eval_precision": 0.9010785100019666, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.1008, + "eval_samples_per_second": 445.389, + "eval_steps_per_second": 3.58, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002124732489306398, + "loss": 0.7602, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8076496674057649, + "eval_f1": 0.8287147046625182, + "eval_loss": 0.9707827568054199, + "eval_precision": 0.8843288167413018, + "eval_recall": 0.8076496674057649, + "eval_runtime": 8.2899, + "eval_samples_per_second": 435.231, + "eval_steps_per_second": 3.498, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00022878178472755142, + "loss": 0.765, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0002281756051037738, + "loss": 0.7595, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9033204241277605, + "eval_loss": 0.8103278875350952, + "eval_precision": 0.905298466448934, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.0259, + "eval_samples_per_second": 449.546, + "eval_steps_per_second": 3.613, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00022682279834783535, + "loss": 0.75, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00022473224263026804, + "loss": 0.7451, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8384146341463414, + "eval_f1": 0.8525827575514818, + "eval_loss": 0.9209639430046082, + "eval_precision": 0.8886039738562579, + "eval_recall": 0.8384146341463414, + "eval_runtime": 8.0645, + "eval_samples_per_second": 447.394, + "eval_steps_per_second": 3.596, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.000221917657804322, + "loss": 0.7417, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.8832333097918874, + "eval_loss": 0.8427346348762512, + "eval_precision": 0.8894238887562792, + "eval_recall": 0.899390243902439, + "eval_runtime": 8.0706, + "eval_samples_per_second": 447.056, + "eval_steps_per_second": 3.593, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00021839751536561333, + "loss": 0.7346, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00021419491722779726, + "loss": 0.7296, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9027929080284146, + "eval_loss": 0.8150851130485535, + "eval_precision": 0.9011630945134786, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.1176, + "eval_samples_per_second": 444.464, + "eval_steps_per_second": 3.572, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002093374441098356, + "loss": 0.726, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9083467120453379, + "eval_loss": 0.8146786093711853, + "eval_precision": 0.9065091045260664, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.592, + "eval_samples_per_second": 419.924, + "eval_steps_per_second": 3.375, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002038569745298588, + "loss": 0.7238, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00019778947559352558, + "loss": 0.7183, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8672394678492239, + "eval_f1": 0.8760669867082911, + "eval_loss": 0.883409857749939, + "eval_precision": 0.8957881889667982, + "eval_recall": 0.8672394678492239, + "eval_runtime": 7.734, + "eval_samples_per_second": 466.513, + "eval_steps_per_second": 3.75, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00019117476694989022, + "loss": 0.721, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00018405625946388247, + "loss": 0.7131, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8705654101995566, + "eval_f1": 0.8803810186871183, + "eval_loss": 0.8828893303871155, + "eval_precision": 0.8996292526307221, + "eval_recall": 0.8705654101995566, + "eval_runtime": 8.4452, + "eval_samples_per_second": 427.223, + "eval_steps_per_second": 3.434, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00017648067032043424, + "loss": 0.7139, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.9001777703178702, + "eval_loss": 0.8273720145225525, + "eval_precision": 0.9000202472169543, + "eval_recall": 0.9038248337028825, + "eval_runtime": 8.2257, + "eval_samples_per_second": 438.625, + "eval_steps_per_second": 3.526, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0001684977164299624, + "loss": 0.7084, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00016015978814731837, + "loss": 0.708, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.908960414460153, + "eval_loss": 0.8162091970443726, + "eval_precision": 0.9094699478691968, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.3601, + "eval_samples_per_second": 431.573, + "eval_steps_per_second": 3.469, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0001515216054455156, + "loss": 0.7076, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001426398588006896, + "loss": 0.7077, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9154831134328336, + "eval_loss": 0.805082380771637, + "eval_precision": 0.9144370533895666, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.2674, + "eval_samples_per_second": 436.412, + "eval_steps_per_second": 3.508, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00013357283714508374, + "loss": 0.7034, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8702882483370288, + "eval_f1": 0.8790505571334688, + "eval_loss": 0.8794576525688171, + "eval_precision": 0.8986314638387642, + "eval_recall": 0.8702882483370288, + "eval_runtime": 8.6612, + "eval_samples_per_second": 416.57, + "eval_steps_per_second": 3.348, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0001243800453297219, + "loss": 0.703, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00011512181360727618, + "loss": 0.6965, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.8967786821382503, + "eval_loss": 0.83233243227005, + "eval_precision": 0.9019144327977696, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.1859, + "eval_samples_per_second": 440.761, + "eval_steps_per_second": 3.543, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00010585890169800694, + "loss": 0.6993, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 9.6652100037202e-05, + "loss": 0.6966, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9019690215632581, + "eval_loss": 0.8229565024375916, + "eval_precision": 0.9030486820828715, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0397, + "eval_samples_per_second": 448.771, + "eval_steps_per_second": 3.607, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 8.756183082104023e-05, + "loss": 0.7028, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.9040169389480236, + "eval_loss": 0.8183352947235107, + "eval_precision": 0.9011938189784076, + "eval_recall": 0.9090909090909091, + "eval_runtime": 8.1914, + "eval_samples_per_second": 440.463, + "eval_steps_per_second": 3.54, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 7.86477514691264e-05, + "loss": 0.6948, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 6.996836310608535e-05, + "loss": 0.6908, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.907688690479266, + "eval_loss": 0.8171535730361938, + "eval_precision": 0.9059311612589884, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.308, + "eval_samples_per_second": 434.283, + "eval_steps_per_second": 3.491, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 6.158062663166334e-05, + "loss": 0.6927, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9083562572776112, + "eval_loss": 0.8181624412536621, + "eval_precision": 0.9078655892824762, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.0853, + "eval_samples_per_second": 446.244, + "eval_steps_per_second": 3.587, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 5.353958889898312e-05, + "loss": 0.6929, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 4.5898021454263e-05, + "loss": 0.6892, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9067940818603066, + "eval_loss": 0.8166985511779785, + "eval_precision": 0.9051251314485794, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.1559, + "eval_samples_per_second": 442.377, + "eval_steps_per_second": 3.556, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.8706074208869544e-05, + "loss": 0.6922, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.201094631657817e-05, + "loss": 0.6909, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9156060090918173, + "eval_loss": 0.8066702485084534, + "eval_precision": 0.9137922568257758, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.3148, + "eval_samples_per_second": 433.923, + "eval_steps_per_second": 3.488, + "step": 510 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.01931914331760705, + "learning_rate": 0.00022881734500222747, + "metric": "eval/loss", + "weight_decay": 0.17233968057266494 + } +} diff --git a/run-wd18zkoi/checkpoint-510/training_args.bin b/run-wd18zkoi/checkpoint-510/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8f245246e705b6ba4bb4372ec204cfca20b241d8 --- /dev/null +++ b/run-wd18zkoi/checkpoint-510/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8c3acf857394ea479c20342273a4f8ace62e1bdaf5c1d4f54a2b0534874a29 +size 4792 diff --git a/run-wd18zkoi/checkpoint-630/model.safetensors b/run-wd18zkoi/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c154a907c1728be609ee0dda3dc0967cebc97337 --- /dev/null +++ b/run-wd18zkoi/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3abcd9129de8e896d82625fc6143dddb406e1887a8436af4560856177b592b65 +size 198025308 diff --git a/run-wd18zkoi/checkpoint-630/optimizer.pt b/run-wd18zkoi/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ff09dde69b579075fd34e92873c86b6c911c039 --- /dev/null +++ b/run-wd18zkoi/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0cd3ece7a621da8519eab71761a61479580d8635934106e4a0409488cba2ab9 +size 395900602 diff --git a/run-wd18zkoi/checkpoint-630/rng_state.pth b/run-wd18zkoi/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-wd18zkoi/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-wd18zkoi/checkpoint-630/scheduler.pt b/run-wd18zkoi/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b2b47dad35448c8bb0d2ad52783837de1432c6a --- /dev/null +++ b/run-wd18zkoi/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4feb12afb886d1ebd180c4510e53b9ff034c097aa500715b4272c269b8166651 +size 1064 diff --git a/run-wd18zkoi/checkpoint-630/trainer_state.json b/run-wd18zkoi/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0506b58d60ad634c6e352233868501d2b33f999e --- /dev/null +++ b/run-wd18zkoi/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9156060090918173, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-wd18zkoi/checkpoint-510", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 2.3608138770071086e-05, + "loss": 1.4413, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7507041432190336, + "eval_loss": 0.9790328741073608, + "eval_precision": 0.6863075433503277, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2616, + "eval_samples_per_second": 436.718, + "eval_steps_per_second": 3.51, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 4.721627754014217e-05, + "loss": 1.0525, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 7.082441631021327e-05, + "loss": 0.9025, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8835920177383592, + "eval_f1": 0.8731858034091996, + "eval_loss": 0.9170128703117371, + "eval_precision": 0.8852074108554507, + "eval_recall": 0.8835920177383592, + "eval_runtime": 7.9842, + "eval_samples_per_second": 451.895, + "eval_steps_per_second": 3.632, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 9.443255508028434e-05, + "loss": 0.8344, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.8975011330398239, + "eval_loss": 0.8190191388130188, + "eval_precision": 0.8937273605667937, + "eval_recall": 0.9065964523281597, + "eval_runtime": 8.3092, + "eval_samples_per_second": 434.216, + "eval_steps_per_second": 3.49, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 0.00011804069385035545, + "loss": 0.8126, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00014164883262042654, + "loss": 0.7889, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9065964523281597, + "eval_f1": 0.8996744506086427, + "eval_loss": 0.8195714354515076, + "eval_precision": 0.8957418127984982, + "eval_recall": 0.9065964523281597, + "eval_runtime": 7.9346, + "eval_samples_per_second": 454.717, + "eval_steps_per_second": 3.655, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00016525697139049763, + "loss": 0.7893, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00018886511016056868, + "loss": 0.7771, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.9039018520068465, + "eval_loss": 0.8151628375053406, + "eval_precision": 0.9010785100019666, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.1008, + "eval_samples_per_second": 445.389, + "eval_steps_per_second": 3.58, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002124732489306398, + "loss": 0.7602, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.8076496674057649, + "eval_f1": 0.8287147046625182, + "eval_loss": 0.9707827568054199, + "eval_precision": 0.8843288167413018, + "eval_recall": 0.8076496674057649, + "eval_runtime": 8.2899, + "eval_samples_per_second": 435.231, + "eval_steps_per_second": 3.498, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00022878178472755142, + "loss": 0.765, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.0002281756051037738, + "loss": 0.7595, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9135254988913526, + "eval_f1": 0.9033204241277605, + "eval_loss": 0.8103278875350952, + "eval_precision": 0.905298466448934, + "eval_recall": 0.9135254988913526, + "eval_runtime": 8.0259, + "eval_samples_per_second": 449.546, + "eval_steps_per_second": 3.613, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00022682279834783535, + "loss": 0.75, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00022473224263026804, + "loss": 0.7451, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8384146341463414, + "eval_f1": 0.8525827575514818, + "eval_loss": 0.9209639430046082, + "eval_precision": 0.8886039738562579, + "eval_recall": 0.8384146341463414, + "eval_runtime": 8.0645, + "eval_samples_per_second": 447.394, + "eval_steps_per_second": 3.596, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.000221917657804322, + "loss": 0.7417, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.899390243902439, + "eval_f1": 0.8832333097918874, + "eval_loss": 0.8427346348762512, + "eval_precision": 0.8894238887562792, + "eval_recall": 0.899390243902439, + "eval_runtime": 8.0706, + "eval_samples_per_second": 447.056, + "eval_steps_per_second": 3.593, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00021839751536561333, + "loss": 0.7346, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00021419491722779726, + "loss": 0.7296, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9027929080284146, + "eval_loss": 0.8150851130485535, + "eval_precision": 0.9011630945134786, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.1176, + "eval_samples_per_second": 444.464, + "eval_steps_per_second": 3.572, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.0002093374441098356, + "loss": 0.726, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9083467120453379, + "eval_loss": 0.8146786093711853, + "eval_precision": 0.9065091045260664, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.592, + "eval_samples_per_second": 419.924, + "eval_steps_per_second": 3.375, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.0002038569745298588, + "loss": 0.7238, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00019778947559352558, + "loss": 0.7183, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8672394678492239, + "eval_f1": 0.8760669867082911, + "eval_loss": 0.883409857749939, + "eval_precision": 0.8957881889667982, + "eval_recall": 0.8672394678492239, + "eval_runtime": 7.734, + "eval_samples_per_second": 466.513, + "eval_steps_per_second": 3.75, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00019117476694989022, + "loss": 0.721, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00018405625946388247, + "loss": 0.7131, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8705654101995566, + "eval_f1": 0.8803810186871183, + "eval_loss": 0.8828893303871155, + "eval_precision": 0.8996292526307221, + "eval_recall": 0.8705654101995566, + "eval_runtime": 8.4452, + "eval_samples_per_second": 427.223, + "eval_steps_per_second": 3.434, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00017648067032043424, + "loss": 0.7139, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9038248337028825, + "eval_f1": 0.9001777703178702, + "eval_loss": 0.8273720145225525, + "eval_precision": 0.9000202472169543, + "eval_recall": 0.9038248337028825, + "eval_runtime": 8.2257, + "eval_samples_per_second": 438.625, + "eval_steps_per_second": 3.526, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0001684977164299624, + "loss": 0.7084, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00016015978814731837, + "loss": 0.708, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.908960414460153, + "eval_loss": 0.8162091970443726, + "eval_precision": 0.9094699478691968, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.3601, + "eval_samples_per_second": 431.573, + "eval_steps_per_second": 3.469, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.0001515216054455156, + "loss": 0.7076, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.0001426398588006896, + "loss": 0.7077, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9154831134328336, + "eval_loss": 0.805082380771637, + "eval_precision": 0.9144370533895666, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.2674, + "eval_samples_per_second": 436.412, + "eval_steps_per_second": 3.508, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.00013357283714508374, + "loss": 0.7034, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8702882483370288, + "eval_f1": 0.8790505571334688, + "eval_loss": 0.8794576525688171, + "eval_precision": 0.8986314638387642, + "eval_recall": 0.8702882483370288, + "eval_runtime": 8.6612, + "eval_samples_per_second": 416.57, + "eval_steps_per_second": 3.348, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 0.0001243800453297219, + "loss": 0.703, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 0.00011512181360727618, + "loss": 0.6965, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.8967786821382503, + "eval_loss": 0.83233243227005, + "eval_precision": 0.9019144327977696, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.1859, + "eval_samples_per_second": 440.761, + "eval_steps_per_second": 3.543, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 0.00010585890169800694, + "loss": 0.6993, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 9.6652100037202e-05, + "loss": 0.6966, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9082594235033259, + "eval_f1": 0.9019690215632581, + "eval_loss": 0.8229565024375916, + "eval_precision": 0.9030486820828715, + "eval_recall": 0.9082594235033259, + "eval_runtime": 8.0397, + "eval_samples_per_second": 448.771, + "eval_steps_per_second": 3.607, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 8.756183082104023e-05, + "loss": 0.7028, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9090909090909091, + "eval_f1": 0.9040169389480236, + "eval_loss": 0.8183352947235107, + "eval_precision": 0.9011938189784076, + "eval_recall": 0.9090909090909091, + "eval_runtime": 8.1914, + "eval_samples_per_second": 440.463, + "eval_steps_per_second": 3.54, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 7.86477514691264e-05, + "loss": 0.6948, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 6.996836310608535e-05, + "loss": 0.6908, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.907688690479266, + "eval_loss": 0.8171535730361938, + "eval_precision": 0.9059311612589884, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.308, + "eval_samples_per_second": 434.283, + "eval_steps_per_second": 3.491, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 6.158062663166334e-05, + "loss": 0.6927, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.9083562572776112, + "eval_loss": 0.8181624412536621, + "eval_precision": 0.9078655892824762, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.0853, + "eval_samples_per_second": 446.244, + "eval_steps_per_second": 3.587, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 5.353958889898312e-05, + "loss": 0.6929, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 4.5898021454263e-05, + "loss": 0.6892, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9067940818603066, + "eval_loss": 0.8166985511779785, + "eval_precision": 0.9051251314485794, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.1559, + "eval_samples_per_second": 442.377, + "eval_steps_per_second": 3.556, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 3.8706074208869544e-05, + "loss": 0.6922, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 3.201094631657817e-05, + "loss": 0.6909, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9156060090918173, + "eval_loss": 0.8066702485084534, + "eval_precision": 0.9137922568257758, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.3148, + "eval_samples_per_second": 433.923, + "eval_steps_per_second": 3.488, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 2.5856576416001353e-05, + "loss": 0.6916, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.9110865278840192, + "eval_loss": 0.8066620230674744, + "eval_precision": 0.9099384785972416, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.366, + "eval_samples_per_second": 431.269, + "eval_steps_per_second": 3.466, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 2.0283354271053817e-05, + "loss": 0.6884, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.5327855701895105e-05, + "loss": 0.6863, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9151705958994395, + "eval_loss": 0.8017975091934204, + "eval_precision": 0.9126798150268394, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.2952, + "eval_samples_per_second": 434.95, + "eval_steps_per_second": 3.496, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 1.1022602545938595e-05, + "loss": 0.6854, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 7.395849224249481e-06, + "loss": 0.682, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9190687361419069, + "eval_f1": 0.9144613619703986, + "eval_loss": 0.8056851625442505, + "eval_precision": 0.91216225742071, + "eval_recall": 0.9190687361419069, + "eval_runtime": 8.3133, + "eval_samples_per_second": 434.002, + "eval_steps_per_second": 3.488, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 4.471397314049577e-06, + "loss": 0.6858, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9126940133037694, + "eval_f1": 0.907663146087261, + "eval_loss": 0.8100470900535583, + "eval_precision": 0.9057251723714131, + "eval_recall": 0.9126940133037694, + "eval_runtime": 8.2551, + "eval_samples_per_second": 437.066, + "eval_steps_per_second": 3.513, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 2.268439344247802e-06, + "loss": 0.6871, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 8.014328391320596e-07, + "loss": 0.6861, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9187915742793792, + "eval_f1": 0.9141936038873829, + "eval_loss": 0.8045759797096252, + "eval_precision": 0.9122218219724145, + "eval_recall": 0.9187915742793792, + "eval_runtime": 8.2125, + "eval_samples_per_second": 439.328, + "eval_steps_per_second": 3.531, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 8.000543684570066e-08, + "loss": 0.6886, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.913837025844137, + "eval_loss": 0.8077741265296936, + "eval_precision": 0.9119795446583439, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.1183, + "eval_samples_per_second": 444.426, + "eval_steps_per_second": 3.572, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.01931914331760705, + "learning_rate": 0.00022881734500222747, + "metric": "eval/loss", + "weight_decay": 0.17233968057266494 + } +} diff --git a/run-wd18zkoi/checkpoint-630/training_args.bin b/run-wd18zkoi/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8f245246e705b6ba4bb4372ec204cfca20b241d8 --- /dev/null +++ b/run-wd18zkoi/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8c3acf857394ea479c20342273a4f8ace62e1bdaf5c1d4f54a2b0534874a29 +size 4792 diff --git a/run-wxq1fn3t/checkpoint-510/model.safetensors b/run-wxq1fn3t/checkpoint-510/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e26736740d4c9355cc0e8cc333772fd117630ce2 --- /dev/null +++ b/run-wxq1fn3t/checkpoint-510/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79d3aa09af483b7b2c0195cc6d7e365a8007aac927d1401d0fd78a64c9fdac57 +size 198025308 diff --git a/run-wxq1fn3t/checkpoint-510/optimizer.pt b/run-wxq1fn3t/checkpoint-510/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2602dc4b80151e6556f79cf0eb7bc905841fc96a --- /dev/null +++ b/run-wxq1fn3t/checkpoint-510/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07fee5fc5b8b3fa1dc50958b687a529fc731ac0a51a2929a9e93dcb98f652968 +size 395900602 diff --git a/run-wxq1fn3t/checkpoint-510/rng_state.pth b/run-wxq1fn3t/checkpoint-510/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d8dc24231ef2178e0e7f1fb8387b9f8514188b8 --- /dev/null +++ b/run-wxq1fn3t/checkpoint-510/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff11e63c65ca788e4e4341e1970557dcb3db9d0cb5075b86ffbecfbd1dc05a1a +size 14244 diff --git a/run-wxq1fn3t/checkpoint-510/scheduler.pt b/run-wxq1fn3t/checkpoint-510/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f94f992cbd673a740fb21ced1bde2a724505b60 --- /dev/null +++ b/run-wxq1fn3t/checkpoint-510/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05e8bbe9f277132b3a976a67855049a38de716c6d1980fd85c25ca64ef101897 +size 1064 diff --git a/run-wxq1fn3t/checkpoint-510/trainer_state.json b/run-wxq1fn3t/checkpoint-510/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..421910229486b7d17a040993e3ad28d159477c0d --- /dev/null +++ b/run-wxq1fn3t/checkpoint-510/trainer_state.json @@ -0,0 +1,550 @@ +{ + "best_metric": 0.9211219837609808, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-wxq1fn3t/checkpoint-510", + "epoch": 24.0, + "eval_steps": 500, + "global_step": 510, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.4870254877672433e-05, + "loss": 1.476, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.751595348408122, + "eval_loss": 1.0507758855819702, + "eval_precision": 0.7195837728456721, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.0097, + "eval_samples_per_second": 450.452, + "eval_steps_per_second": 3.621, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.9740509755344865e-05, + "loss": 1.1445, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.46107646330173e-05, + "loss": 0.9418, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8669623059866962, + "eval_f1": 0.8352110361562193, + "eval_loss": 0.9198800325393677, + "eval_precision": 0.8541170889802767, + "eval_recall": 0.8669623059866962, + "eval_runtime": 8.199, + "eval_samples_per_second": 440.056, + "eval_steps_per_second": 3.537, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 5.948101951068973e-05, + "loss": 0.8671, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9018847006651884, + "eval_f1": 0.89392861990502, + "eval_loss": 0.840576171875, + "eval_precision": 0.8920500751900134, + "eval_recall": 0.9018847006651884, + "eval_runtime": 8.0596, + "eval_samples_per_second": 447.663, + "eval_steps_per_second": 3.598, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.435127438836218e-05, + "loss": 0.8375, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 8.92215292660346e-05, + "loss": 0.8034, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9076618717631103, + "eval_loss": 0.8246948719024658, + "eval_precision": 0.9054196761648374, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.3064, + "eval_samples_per_second": 434.362, + "eval_steps_per_second": 3.491, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010409178414370703, + "loss": 0.7964, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011896203902137946, + "loss": 0.7818, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8938470066518847, + "eval_f1": 0.893832757523224, + "eval_loss": 0.8284726738929749, + "eval_precision": 0.8978726181326273, + "eval_recall": 0.8938470066518847, + "eval_runtime": 8.2276, + "eval_samples_per_second": 438.524, + "eval_steps_per_second": 3.525, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001338322938990519, + "loss": 0.7645, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9040236128291175, + "eval_loss": 0.8089054226875305, + "eval_precision": 0.9058324946098152, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8251, + "eval_samples_per_second": 461.078, + "eval_steps_per_second": 3.706, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00014410468709123196, + "loss": 0.768, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00014372286768585588, + "loss": 0.759, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9117556268134561, + "eval_loss": 0.8041640520095825, + "eval_precision": 0.9094891926364399, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.107, + "eval_samples_per_second": 445.05, + "eval_steps_per_second": 3.577, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0001428707640339346, + "loss": 0.7545, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00014155396830264162, + "loss": 0.7505, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.901892903241517, + "eval_loss": 0.8154520392417908, + "eval_precision": 0.9006133136945058, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.0317, + "eval_samples_per_second": 449.221, + "eval_steps_per_second": 3.611, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013978112233014564, + "loss": 0.7439, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.8999008811088898, + "eval_loss": 0.8328478336334229, + "eval_precision": 0.9042675087977353, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.1533, + "eval_samples_per_second": 442.522, + "eval_steps_per_second": 3.557, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00013756386091114426, + "loss": 0.7399, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00013491673544028835, + "loss": 0.7362, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8619733924611973, + "eval_f1": 0.8710253179805159, + "eval_loss": 0.8900936245918274, + "eval_precision": 0.891380863020852, + "eval_recall": 0.8619733924611973, + "eval_runtime": 8.3712, + "eval_samples_per_second": 430.999, + "eval_steps_per_second": 3.464, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00013185711841460807, + "loss": 0.7277, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.8862303233739032, + "eval_loss": 0.8535647392272949, + "eval_precision": 0.8971120799598773, + "eval_recall": 0.8819290465631929, + "eval_runtime": 8.1065, + "eval_samples_per_second": 445.077, + "eval_steps_per_second": 3.577, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00012840508942167024, + "loss": 0.7262, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0001245833033617008, + "loss": 0.7211, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.8910755661507798, + "eval_loss": 0.8297373056411743, + "eval_precision": 0.8984650170315818, + "eval_recall": 0.905210643015521, + "eval_runtime": 8.0921, + "eval_samples_per_second": 445.866, + "eval_steps_per_second": 3.584, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012041684176850233, + "loss": 0.7219, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00011593304820491359, + "loss": 0.7159, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.909906337604824, + "eval_loss": 0.8070564866065979, + "eval_precision": 0.9066751264475545, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.0376, + "eval_samples_per_second": 448.892, + "eval_steps_per_second": 3.608, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011116134881307441, + "loss": 0.7169, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9059580201118518, + "eval_loss": 0.8160692453384399, + "eval_precision": 0.90514443417894, + "eval_recall": 0.9113082039911308, + "eval_runtime": 8.2, + "eval_samples_per_second": 440.0, + "eval_steps_per_second": 3.537, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010613305919718507, + "loss": 0.7134, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010088117890614521, + "loss": 0.712, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8986847573684905, + "eval_loss": 0.8256596326828003, + "eval_precision": 0.9022370889767114, + "eval_recall": 0.8977272727272727, + "eval_runtime": 8.4259, + "eval_samples_per_second": 428.204, + "eval_steps_per_second": 3.442, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 9.544017486483758e-05, + "loss": 0.7121, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.984575517534857e-05, + "loss": 0.7095, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.890904362080467, + "eval_loss": 0.8316482305526733, + "eval_precision": 0.8983071674221443, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.0509, + "eval_samples_per_second": 448.148, + "eval_steps_per_second": 3.602, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 8.413463477261855e-05, + "loss": 0.7034, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8841463414634146, + "eval_f1": 0.8912648091409628, + "eval_loss": 0.8533964157104492, + "eval_precision": 0.9043909248696138, + "eval_recall": 0.8841463414634146, + "eval_runtime": 8.0535, + "eval_samples_per_second": 448.005, + "eval_steps_per_second": 3.601, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.834429447247128e-05, + "loss": 0.7064, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 7.251273499333723e-05, + "loss": 0.7016, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9171411496911781, + "eval_loss": 0.8001407384872437, + "eval_precision": 0.9138976097144181, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.2087, + "eval_samples_per_second": 439.531, + "eval_steps_per_second": 3.533, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 6.667822756597149e-05, + "loss": 0.7029, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 6.087906276785915e-05, + "loss": 0.6979, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.905143263853918, + "eval_loss": 0.8209952712059021, + "eval_precision": 0.9064952868274772, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.1302, + "eval_samples_per_second": 443.776, + "eval_steps_per_second": 3.567, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 5.515329923065262e-05, + "loss": 0.7046, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9108735592537623, + "eval_loss": 0.8073176741600037, + "eval_precision": 0.9097380052113129, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.2065, + "eval_samples_per_second": 439.653, + "eval_steps_per_second": 3.534, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.9538513869817634e-05, + "loss": 0.696, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.407155527567617e-05, + "loss": 0.6935, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9119617556263947, + "eval_loss": 0.8143486380577087, + "eval_precision": 0.9110337761316989, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.3756, + "eval_samples_per_second": 430.775, + "eval_steps_per_second": 3.462, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.878830188428527e-05, + "loss": 0.697, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9111383703238648, + "eval_loss": 0.8114368319511414, + "eval_precision": 0.9113995845090351, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.0257, + "eval_samples_per_second": 449.558, + "eval_steps_per_second": 3.613, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.372342651522305e-05, + "loss": 0.6965, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.8910168821568357e-05, + "loss": 0.6918, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9105854879244112, + "eval_loss": 0.8113408088684082, + "eval_precision": 0.9098828267812555, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1754, + "eval_samples_per_second": 441.326, + "eval_steps_per_second": 3.547, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.4380117145433924e-05, + "loss": 0.6939, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.016300121068687e-05, + "loss": 0.6931, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9240576496674058, + "eval_f1": 0.9211219837609808, + "eval_loss": 0.7976791262626648, + "eval_precision": 0.9198585512225491, + "eval_recall": 0.9240576496674058, + "eval_runtime": 8.2334, + "eval_samples_per_second": 438.216, + "eval_steps_per_second": 3.522, + "step": 510 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.1692906290585636, + "learning_rate": 0.0001441270857374405, + "metric": "eval/loss", + "weight_decay": 0.1625015259041905 + } +} diff --git a/run-wxq1fn3t/checkpoint-510/training_args.bin b/run-wxq1fn3t/checkpoint-510/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c3e20c512d4f7ae3f42bc09a52d1259706f3cf7f --- /dev/null +++ b/run-wxq1fn3t/checkpoint-510/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c8ee0e17d198bb43e0df998003bbfd67aeea5ac3e4a4943e73095410cb61ca +size 4792 diff --git a/run-wxq1fn3t/checkpoint-630/model.safetensors b/run-wxq1fn3t/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..096d5258404736d442efdfd6a45c54ecb7a0d1f0 --- /dev/null +++ b/run-wxq1fn3t/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff1132315384102b3096e1a25be6707498af407c6419e3baba53dfa34a52863 +size 198025308 diff --git a/run-wxq1fn3t/checkpoint-630/optimizer.pt b/run-wxq1fn3t/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e6071225b755df2dec97ebaf4351e024dbe2b96 --- /dev/null +++ b/run-wxq1fn3t/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ec50644ae6111329ec2eaae13e95d2efb8c0645df4eb5357d996dd5dfc59663 +size 395900602 diff --git a/run-wxq1fn3t/checkpoint-630/rng_state.pth b/run-wxq1fn3t/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-wxq1fn3t/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-wxq1fn3t/checkpoint-630/scheduler.pt b/run-wxq1fn3t/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f4981b4c90062e8ac7a3de4e1544da0d81177ef --- /dev/null +++ b/run-wxq1fn3t/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3186f27e768accc5ca0c6f56436217d592dd05e3e60f3803c18f600c545ab91 +size 1064 diff --git a/run-wxq1fn3t/checkpoint-630/trainer_state.json b/run-wxq1fn3t/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..287356198f71c4d404c26a16855062cba1fb2a5e --- /dev/null +++ b/run-wxq1fn3t/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9211219837609808, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-wxq1fn3t/checkpoint-510", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.4870254877672433e-05, + "loss": 1.476, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8287139689578714, + "eval_f1": 0.751595348408122, + "eval_loss": 1.0507758855819702, + "eval_precision": 0.7195837728456721, + "eval_recall": 0.8287139689578714, + "eval_runtime": 8.0097, + "eval_samples_per_second": 450.452, + "eval_steps_per_second": 3.621, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 2.9740509755344865e-05, + "loss": 1.1445, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 4.46107646330173e-05, + "loss": 0.9418, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8669623059866962, + "eval_f1": 0.8352110361562193, + "eval_loss": 0.9198800325393677, + "eval_precision": 0.8541170889802767, + "eval_recall": 0.8669623059866962, + "eval_runtime": 8.199, + "eval_samples_per_second": 440.056, + "eval_steps_per_second": 3.537, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 5.948101951068973e-05, + "loss": 0.8671, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9018847006651884, + "eval_f1": 0.89392861990502, + "eval_loss": 0.840576171875, + "eval_precision": 0.8920500751900134, + "eval_recall": 0.9018847006651884, + "eval_runtime": 8.0596, + "eval_samples_per_second": 447.663, + "eval_steps_per_second": 3.598, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 7.435127438836218e-05, + "loss": 0.8375, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 8.92215292660346e-05, + "loss": 0.8034, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9076618717631103, + "eval_loss": 0.8246948719024658, + "eval_precision": 0.9054196761648374, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.3064, + "eval_samples_per_second": 434.362, + "eval_steps_per_second": 3.491, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00010409178414370703, + "loss": 0.7964, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00011896203902137946, + "loss": 0.7818, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8938470066518847, + "eval_f1": 0.893832757523224, + "eval_loss": 0.8284726738929749, + "eval_precision": 0.8978726181326273, + "eval_recall": 0.8938470066518847, + "eval_runtime": 8.2276, + "eval_samples_per_second": 438.524, + "eval_steps_per_second": 3.525, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001338322938990519, + "loss": 0.7645, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9040236128291175, + "eval_loss": 0.8089054226875305, + "eval_precision": 0.9058324946098152, + "eval_recall": 0.9132483370288248, + "eval_runtime": 7.8251, + "eval_samples_per_second": 461.078, + "eval_steps_per_second": 3.706, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00014410468709123196, + "loss": 0.768, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00014372286768585588, + "loss": 0.759, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9117556268134561, + "eval_loss": 0.8041640520095825, + "eval_precision": 0.9094891926364399, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.107, + "eval_samples_per_second": 445.05, + "eval_steps_per_second": 3.577, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.0001428707640339346, + "loss": 0.7545, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00014155396830264162, + "loss": 0.7505, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9107538802660754, + "eval_f1": 0.901892903241517, + "eval_loss": 0.8154520392417908, + "eval_precision": 0.9006133136945058, + "eval_recall": 0.9107538802660754, + "eval_runtime": 8.0317, + "eval_samples_per_second": 449.221, + "eval_steps_per_second": 3.611, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00013978112233014564, + "loss": 0.7439, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9121396895787139, + "eval_f1": 0.8999008811088898, + "eval_loss": 0.8328478336334229, + "eval_precision": 0.9042675087977353, + "eval_recall": 0.9121396895787139, + "eval_runtime": 8.1533, + "eval_samples_per_second": 442.522, + "eval_steps_per_second": 3.557, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00013756386091114426, + "loss": 0.7399, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00013491673544028835, + "loss": 0.7362, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8619733924611973, + "eval_f1": 0.8710253179805159, + "eval_loss": 0.8900936245918274, + "eval_precision": 0.891380863020852, + "eval_recall": 0.8619733924611973, + "eval_runtime": 8.3712, + "eval_samples_per_second": 430.999, + "eval_steps_per_second": 3.464, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00013185711841460807, + "loss": 0.7277, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.8862303233739032, + "eval_loss": 0.8535647392272949, + "eval_precision": 0.8971120799598773, + "eval_recall": 0.8819290465631929, + "eval_runtime": 8.1065, + "eval_samples_per_second": 445.077, + "eval_steps_per_second": 3.577, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00012840508942167024, + "loss": 0.7262, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.0001245833033617008, + "loss": 0.7211, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.905210643015521, + "eval_f1": 0.8910755661507798, + "eval_loss": 0.8297373056411743, + "eval_precision": 0.8984650170315818, + "eval_recall": 0.905210643015521, + "eval_runtime": 8.0921, + "eval_samples_per_second": 445.866, + "eval_steps_per_second": 3.584, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00012041684176850233, + "loss": 0.7219, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00011593304820491359, + "loss": 0.7159, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9157427937915743, + "eval_f1": 0.909906337604824, + "eval_loss": 0.8070564866065979, + "eval_precision": 0.9066751264475545, + "eval_recall": 0.9157427937915743, + "eval_runtime": 8.0376, + "eval_samples_per_second": 448.892, + "eval_steps_per_second": 3.608, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.00011116134881307441, + "loss": 0.7169, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9113082039911308, + "eval_f1": 0.9059580201118518, + "eval_loss": 0.8160692453384399, + "eval_precision": 0.90514443417894, + "eval_recall": 0.9113082039911308, + "eval_runtime": 8.2, + "eval_samples_per_second": 440.0, + "eval_steps_per_second": 3.537, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00010613305919718507, + "loss": 0.7134, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00010088117890614521, + "loss": 0.712, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8977272727272727, + "eval_f1": 0.8986847573684905, + "eval_loss": 0.8256596326828003, + "eval_precision": 0.9022370889767114, + "eval_recall": 0.8977272727272727, + "eval_runtime": 8.4259, + "eval_samples_per_second": 428.204, + "eval_steps_per_second": 3.442, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 9.544017486483758e-05, + "loss": 0.7121, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 8.984575517534857e-05, + "loss": 0.7095, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.890904362080467, + "eval_loss": 0.8316482305526733, + "eval_precision": 0.8983071674221443, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.0509, + "eval_samples_per_second": 448.148, + "eval_steps_per_second": 3.602, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 8.413463477261855e-05, + "loss": 0.7034, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8841463414634146, + "eval_f1": 0.8912648091409628, + "eval_loss": 0.8533964157104492, + "eval_precision": 0.9043909248696138, + "eval_recall": 0.8841463414634146, + "eval_runtime": 8.0535, + "eval_samples_per_second": 448.005, + "eval_steps_per_second": 3.601, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 7.834429447247128e-05, + "loss": 0.7064, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 7.251273499333723e-05, + "loss": 0.7016, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9171411496911781, + "eval_loss": 0.8001407384872437, + "eval_precision": 0.9138976097144181, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.2087, + "eval_samples_per_second": 439.531, + "eval_steps_per_second": 3.533, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 6.667822756597149e-05, + "loss": 0.7029, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 6.087906276785915e-05, + "loss": 0.6979, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.905143263853918, + "eval_loss": 0.8209952712059021, + "eval_precision": 0.9064952868274772, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.1302, + "eval_samples_per_second": 443.776, + "eval_steps_per_second": 3.567, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 5.515329923065262e-05, + "loss": 0.7046, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9143569844789357, + "eval_f1": 0.9108735592537623, + "eval_loss": 0.8073176741600037, + "eval_precision": 0.9097380052113129, + "eval_recall": 0.9143569844789357, + "eval_runtime": 8.2065, + "eval_samples_per_second": 439.653, + "eval_steps_per_second": 3.534, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 4.9538513869817634e-05, + "loss": 0.696, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 4.407155527567617e-05, + "loss": 0.6935, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9119617556263947, + "eval_loss": 0.8143486380577087, + "eval_precision": 0.9110337761316989, + "eval_recall": 0.9168514412416852, + "eval_runtime": 8.3756, + "eval_samples_per_second": 430.775, + "eval_steps_per_second": 3.462, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 3.878830188428527e-05, + "loss": 0.697, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.916019955654102, + "eval_f1": 0.9111383703238648, + "eval_loss": 0.8114368319511414, + "eval_precision": 0.9113995845090351, + "eval_recall": 0.916019955654102, + "eval_runtime": 8.0257, + "eval_samples_per_second": 449.558, + "eval_steps_per_second": 3.613, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.372342651522305e-05, + "loss": 0.6965, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 2.8910168821568357e-05, + "loss": 0.6918, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9151884700665188, + "eval_f1": 0.9105854879244112, + "eval_loss": 0.8113408088684082, + "eval_precision": 0.9098828267812555, + "eval_recall": 0.9151884700665188, + "eval_runtime": 8.1754, + "eval_samples_per_second": 441.326, + "eval_steps_per_second": 3.547, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.4380117145433924e-05, + "loss": 0.6939, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.016300121068687e-05, + "loss": 0.6931, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9240576496674058, + "eval_f1": 0.9211219837609808, + "eval_loss": 0.7976791262626648, + "eval_precision": 0.9198585512225491, + "eval_recall": 0.9240576496674058, + "eval_runtime": 8.2334, + "eval_samples_per_second": 438.216, + "eval_steps_per_second": 3.522, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.6286497013368595e-05, + "loss": 0.6932, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9124711944741329, + "eval_loss": 0.8060343265533447, + "eval_precision": 0.9122416206835677, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.0112, + "eval_samples_per_second": 450.367, + "eval_steps_per_second": 3.62, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.2776045190274343e-05, + "loss": 0.6921, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 9.654684057699587e-06, + "loss": 0.6906, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9183619893760684, + "eval_loss": 0.7988142967224121, + "eval_precision": 0.9170085165468955, + "eval_recall": 0.9223946784922394, + "eval_runtime": 8.1129, + "eval_samples_per_second": 444.725, + "eval_steps_per_second": 3.575, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 6.942898416082735e-06, + "loss": 0.6901, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 4.658485112805636e-06, + "loss": 0.6867, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9158444370340527, + "eval_loss": 0.8033143877983093, + "eval_precision": 0.9141664578171705, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.0607, + "eval_samples_per_second": 447.604, + "eval_steps_per_second": 3.598, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 2.816436245433715e-06, + "loss": 0.6902, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9157575773094981, + "eval_loss": 0.804436981678009, + "eval_precision": 0.9150855753319124, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.8872, + "eval_samples_per_second": 457.448, + "eval_steps_per_second": 3.677, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.4288407719065336e-06, + "loss": 0.6909, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 5.048051733895535e-07, + "loss": 0.6896, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9215631929046563, + "eval_f1": 0.9175514425994765, + "eval_loss": 0.7991068959236145, + "eval_precision": 0.915743912400148, + "eval_recall": 0.9215631929046563, + "eval_runtime": 8.0166, + "eval_samples_per_second": 450.064, + "eval_steps_per_second": 3.617, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 5.039369045912769e-08, + "loss": 0.6917, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9142678261388635, + "eval_loss": 0.8052626848220825, + "eval_precision": 0.9133905682685922, + "eval_recall": 0.9193458980044346, + "eval_runtime": 7.774, + "eval_samples_per_second": 464.112, + "eval_steps_per_second": 3.73, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.1692906290585636, + "learning_rate": 0.0001441270857374405, + "metric": "eval/loss", + "weight_decay": 0.1625015259041905 + } +} diff --git a/run-wxq1fn3t/checkpoint-630/training_args.bin b/run-wxq1fn3t/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c3e20c512d4f7ae3f42bc09a52d1259706f3cf7f --- /dev/null +++ b/run-wxq1fn3t/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c8ee0e17d198bb43e0df998003bbfd67aeea5ac3e4a4943e73095410cb61ca +size 4792 diff --git a/run-wzu0t9n4/checkpoint-552/model.safetensors b/run-wzu0t9n4/checkpoint-552/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c88bbf09a309947cff4e28d0b72ddabeeb6b6b02 --- /dev/null +++ b/run-wzu0t9n4/checkpoint-552/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2630866ca591a7e5c1c861a71242fa0d2f186ebe6c419a32b73a333d6f762c2 +size 198025308 diff --git a/run-wzu0t9n4/checkpoint-552/optimizer.pt b/run-wzu0t9n4/checkpoint-552/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4871ccf196cff3e36cfec17e431eff522233a7c4 --- /dev/null +++ b/run-wzu0t9n4/checkpoint-552/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b93a46a8df3535284797832c3463bb95b036ea2b1cd79bec08822bbc998f19 +size 395900602 diff --git a/run-wzu0t9n4/checkpoint-552/rng_state.pth b/run-wzu0t9n4/checkpoint-552/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a07d02214d4d2a0bd650d84451df8b01ad9e2e1f --- /dev/null +++ b/run-wzu0t9n4/checkpoint-552/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea0e7f2a9ffdc1f2f52e0b770bd1a5190fc0a00c767b73b57597c52b6f4dee6 +size 14244 diff --git a/run-wzu0t9n4/checkpoint-552/scheduler.pt b/run-wzu0t9n4/checkpoint-552/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..93db246cb925893f8d48d70ffa401676eae55566 --- /dev/null +++ b/run-wzu0t9n4/checkpoint-552/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038a8d761ca54fe0818b9598c19dfc1c0e8a822c5d98dca5ec9902f6ffb6f4d8 +size 1064 diff --git a/run-wzu0t9n4/checkpoint-552/trainer_state.json b/run-wzu0t9n4/checkpoint-552/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c540f3e0f2710b2f3c845a5d21bbcc9c71b83239 --- /dev/null +++ b/run-wzu0t9n4/checkpoint-552/trainer_state.json @@ -0,0 +1,592 @@ +{ + "best_metric": 0.9182043640992674, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-wzu0t9n4/checkpoint-552", + "epoch": 25.976470588235294, + "eval_steps": 500, + "global_step": 552, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.749902708284362e-05, + "loss": 1.4648, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.0071903467178345, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2511, + "eval_samples_per_second": 437.273, + "eval_steps_per_second": 3.515, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.499805416568724e-05, + "loss": 1.102, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.249708124853087e-05, + "loss": 0.9199, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8727827050997783, + "eval_f1": 0.8477679707321143, + "eval_loss": 0.907704770565033, + "eval_precision": 0.8664476696917162, + "eval_recall": 0.8727827050997783, + "eval_runtime": 8.0027, + "eval_samples_per_second": 450.848, + "eval_steps_per_second": 3.624, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.999610833137448e-05, + "loss": 0.8517, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.8980253353260195, + "eval_loss": 0.831325352191925, + "eval_precision": 0.8946657878043787, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.0564, + "eval_samples_per_second": 447.843, + "eval_steps_per_second": 3.6, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 8.749513541421812e-05, + "loss": 0.823, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00010499416249706174, + "loss": 0.7965, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.9046650550576825, + "eval_loss": 0.8242368102073669, + "eval_precision": 0.9032023321116387, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.3043, + "eval_samples_per_second": 434.476, + "eval_steps_per_second": 3.492, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012249318957990534, + "loss": 0.791, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00013999221666274896, + "loss": 0.7788, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.8826304155226078, + "eval_loss": 0.8459151983261108, + "eval_precision": 0.8905812152415564, + "eval_recall": 0.8819290465631929, + "eval_runtime": 8.0728, + "eval_samples_per_second": 446.93, + "eval_steps_per_second": 3.592, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001574912437455926, + "loss": 0.765, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9074805320323697, + "eval_loss": 0.8087025284767151, + "eval_precision": 0.9086086320350719, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.3123, + "eval_samples_per_second": 434.057, + "eval_steps_per_second": 3.489, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016957959651118514, + "loss": 0.7669, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00016913027885184472, + "loss": 0.7605, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9038443106414301, + "eval_loss": 0.8290393948554993, + "eval_precision": 0.9033773674296167, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.639, + "eval_samples_per_second": 472.313, + "eval_steps_per_second": 3.796, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00016812753982652046, + "loss": 0.7558, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00016657796018891338, + "loss": 0.7508, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8580931263858093, + "eval_f1": 0.8670828666352879, + "eval_loss": 0.8918585777282715, + "eval_precision": 0.889348209841759, + "eval_recall": 0.8580931263858093, + "eval_runtime": 8.4599, + "eval_samples_per_second": 426.484, + "eval_steps_per_second": 3.428, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001644917094863112, + "loss": 0.7447, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.891629711751663, + "eval_f1": 0.870721000914541, + "eval_loss": 0.8580062389373779, + "eval_precision": 0.8847617894852993, + "eval_recall": 0.891629711751663, + "eval_runtime": 8.0662, + "eval_samples_per_second": 447.298, + "eval_steps_per_second": 3.595, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00016188247931910626, + "loss": 0.7409, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00015876739348586038, + "loss": 0.7349, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8517184035476718, + "eval_f1": 0.8630638507115688, + "eval_loss": 0.8996167778968811, + "eval_precision": 0.8884498450738221, + "eval_recall": 0.8517184035476718, + "eval_runtime": 8.0983, + "eval_samples_per_second": 445.528, + "eval_steps_per_second": 3.581, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00015516689560361502, + "loss": 0.726, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8899667405764967, + "eval_f1": 0.892937152740948, + "eval_loss": 0.8514795303344727, + "eval_precision": 0.9046798158531489, + "eval_recall": 0.8899667405764967, + "eval_runtime": 7.766, + "eval_samples_per_second": 464.588, + "eval_steps_per_second": 3.734, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00015110461494096937, + "loss": 0.7262, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00014660721134443416, + "loss": 0.7199, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9070159567246998, + "eval_loss": 0.8142106533050537, + "eval_precision": 0.90477631497617, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.6408, + "eval_samples_per_second": 472.201, + "eval_steps_per_second": 3.795, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001417042002757752, + "loss": 0.7218, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00013642775910858787, + "loss": 0.7151, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9103028644343328, + "eval_loss": 0.8058192729949951, + "eval_precision": 0.9070720065079413, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.1494, + "eval_samples_per_second": 442.73, + "eval_steps_per_second": 3.559, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0001308125159553345, + "loss": 0.7152, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9013298349197362, + "eval_loss": 0.8139892220497131, + "eval_precision": 0.9004110235379639, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.2547, + "eval_samples_per_second": 437.083, + "eval_steps_per_second": 3.513, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0001248953224107272, + "loss": 0.7112, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00011871501170288919, + "loss": 0.711, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.8864016010986537, + "eval_loss": 0.8521913886070251, + "eval_precision": 0.8990394098045063, + "eval_recall": 0.8819290465631929, + "eval_runtime": 8.442, + "eval_samples_per_second": 427.388, + "eval_steps_per_second": 3.435, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00011231214383949673, + "loss": 0.7107, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010572873942144917, + "loss": 0.7111, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9106722436787885, + "eval_loss": 0.8081309199333191, + "eval_precision": 0.9105537156932374, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.5961, + "eval_samples_per_second": 419.725, + "eval_steps_per_second": 3.374, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 9.900800387099059e-05, + "loss": 0.7035, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9099606211584155, + "eval_loss": 0.8164511919021606, + "eval_precision": 0.9103425479496782, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.3607, + "eval_samples_per_second": 431.541, + "eval_steps_per_second": 3.469, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 9.219404388411121e-05, + "loss": 0.7051, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 8.533157796808965e-05, + "loss": 0.7005, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9149931305100083, + "eval_loss": 0.8057773113250732, + "eval_precision": 0.9136297883836986, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.2722, + "eval_samples_per_second": 436.162, + "eval_steps_per_second": 3.506, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 7.846564296385345e-05, + "loss": 0.7012, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 7.164129847918612e-05, + "loss": 0.6975, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9029933481152993, + "eval_f1": 0.9045218579711964, + "eval_loss": 0.8318495154380798, + "eval_precision": 0.9099746342129503, + "eval_recall": 0.9029933481152993, + "eval_runtime": 8.1399, + "eval_samples_per_second": 443.247, + "eval_steps_per_second": 3.563, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 6.490333117252092e-05, + "loss": 0.7033, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9070761690471957, + "eval_loss": 0.8104382753372192, + "eval_precision": 0.9040788632137852, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.2261, + "eval_samples_per_second": 438.602, + "eval_steps_per_second": 3.525, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 5.829596082804002e-05, + "loss": 0.6955, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.186255015104426e-05, + "loss": 0.6933, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.905713920676339, + "eval_loss": 0.8186340928077698, + "eval_precision": 0.9046215751339501, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.1912, + "eval_samples_per_second": 440.472, + "eval_steps_per_second": 3.54, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 4.564532018814091e-05, + "loss": 0.6965, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9114686263626974, + "eval_loss": 0.8115004301071167, + "eval_precision": 0.9102352427155284, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.2759, + "eval_samples_per_second": 435.962, + "eval_steps_per_second": 3.504, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.968507323988414e-05, + "loss": 0.6937, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.4020925084331305e-05, + "loss": 0.6907, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9123402535947018, + "eval_loss": 0.8141847848892212, + "eval_precision": 0.9101482277775821, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.8511, + "eval_samples_per_second": 459.551, + "eval_steps_per_second": 3.694, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.8690048268871798e-05, + "loss": 0.6935, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.3727428155047567e-05, + "loss": 0.6912, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9165593006457079, + "eval_loss": 0.8030330538749695, + "eval_precision": 0.9152832938304444, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.1974, + "eval_samples_per_second": 440.142, + "eval_steps_per_second": 3.538, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.9165633317389254e-05, + "loss": 0.6925, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9126989221907172, + "eval_loss": 0.8106822371482849, + "eval_precision": 0.9114571220700429, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.21, + "eval_samples_per_second": 439.467, + "eval_steps_per_second": 3.532, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.5034601803088852e-05, + "loss": 0.6883, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.1361444655239707e-05, + "loss": 0.6892, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9182043640992674, + "eval_loss": 0.7995796203613281, + "eval_precision": 0.9174042023103985, + "eval_recall": 0.9229490022172949, + "eval_runtime": 8.0656, + "eval_samples_per_second": 447.33, + "eval_steps_per_second": 3.596, + "step": 552 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.037072725913030305, + "learning_rate": 0.00016960595480294588, + "metric": "eval/loss", + "weight_decay": 0.1893111958073303 + } +} diff --git a/run-wzu0t9n4/checkpoint-552/training_args.bin b/run-wzu0t9n4/checkpoint-552/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fdbbd7434b8d1fd3459a33897d022b5c77485171 --- /dev/null +++ b/run-wzu0t9n4/checkpoint-552/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b412fd7f26b2ba6046858b14cc1b9a19c1795e7e39ef0d3c8380b0e2b413415d +size 4792 diff --git a/run-wzu0t9n4/checkpoint-630/model.safetensors b/run-wzu0t9n4/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5798972552182e162c89a200b1e719445882f68 --- /dev/null +++ b/run-wzu0t9n4/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:060e7fe382d8efedc22b04226b22886592109b55cd5b5fbf8b3a0a92d4ce7b78 +size 198025308 diff --git a/run-wzu0t9n4/checkpoint-630/optimizer.pt b/run-wzu0t9n4/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c19f19c79555d87277d1590ca64a3c81a8178528 --- /dev/null +++ b/run-wzu0t9n4/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dda431364b24cd94cb7e04918cebd1fde146318b8a0b9d5ea02fd40399a006b +size 395900602 diff --git a/run-wzu0t9n4/checkpoint-630/rng_state.pth b/run-wzu0t9n4/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-wzu0t9n4/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-wzu0t9n4/checkpoint-630/scheduler.pt b/run-wzu0t9n4/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ee4a6f31f5a410e9ad5fc6d89665b7cf9fc1e1c --- /dev/null +++ b/run-wzu0t9n4/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:060309880a4fec3285c30e839e920d7657c5c0c8d6e500fd9a0bfc66aac0ddc0 +size 1064 diff --git a/run-wzu0t9n4/checkpoint-630/trainer_state.json b/run-wzu0t9n4/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9659c157ae663c20fa516b9c6c6b417f86d07a4f --- /dev/null +++ b/run-wzu0t9n4/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9182043640992674, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-wzu0t9n4/checkpoint-552", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.749902708284362e-05, + "loss": 1.4648, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.0071903467178345, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.2511, + "eval_samples_per_second": 437.273, + "eval_steps_per_second": 3.515, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.499805416568724e-05, + "loss": 1.102, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.249708124853087e-05, + "loss": 0.9199, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8727827050997783, + "eval_f1": 0.8477679707321143, + "eval_loss": 0.907704770565033, + "eval_precision": 0.8664476696917162, + "eval_recall": 0.8727827050997783, + "eval_runtime": 8.0027, + "eval_samples_per_second": 450.848, + "eval_steps_per_second": 3.624, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 6.999610833137448e-05, + "loss": 0.8517, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9060421286031042, + "eval_f1": 0.8980253353260195, + "eval_loss": 0.831325352191925, + "eval_precision": 0.8946657878043787, + "eval_recall": 0.9060421286031042, + "eval_runtime": 8.0564, + "eval_samples_per_second": 447.843, + "eval_steps_per_second": 3.6, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 8.749513541421812e-05, + "loss": 0.823, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00010499416249706174, + "loss": 0.7965, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.9046650550576825, + "eval_loss": 0.8242368102073669, + "eval_precision": 0.9032023321116387, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.3043, + "eval_samples_per_second": 434.476, + "eval_steps_per_second": 3.492, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012249318957990534, + "loss": 0.791, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00013999221666274896, + "loss": 0.7788, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.8826304155226078, + "eval_loss": 0.8459151983261108, + "eval_precision": 0.8905812152415564, + "eval_recall": 0.8819290465631929, + "eval_runtime": 8.0728, + "eval_samples_per_second": 446.93, + "eval_steps_per_second": 3.592, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001574912437455926, + "loss": 0.765, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9074805320323697, + "eval_loss": 0.8087025284767151, + "eval_precision": 0.9086086320350719, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.3123, + "eval_samples_per_second": 434.057, + "eval_steps_per_second": 3.489, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.00016957959651118514, + "loss": 0.7669, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00016913027885184472, + "loss": 0.7605, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9038443106414301, + "eval_loss": 0.8290393948554993, + "eval_precision": 0.9033773674296167, + "eval_recall": 0.9099223946784922, + "eval_runtime": 7.639, + "eval_samples_per_second": 472.313, + "eval_steps_per_second": 3.796, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00016812753982652046, + "loss": 0.7558, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.00016657796018891338, + "loss": 0.7508, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8580931263858093, + "eval_f1": 0.8670828666352879, + "eval_loss": 0.8918585777282715, + "eval_precision": 0.889348209841759, + "eval_recall": 0.8580931263858093, + "eval_runtime": 8.4599, + "eval_samples_per_second": 426.484, + "eval_steps_per_second": 3.428, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.0001644917094863112, + "loss": 0.7447, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.891629711751663, + "eval_f1": 0.870721000914541, + "eval_loss": 0.8580062389373779, + "eval_precision": 0.8847617894852993, + "eval_recall": 0.891629711751663, + "eval_runtime": 8.0662, + "eval_samples_per_second": 447.298, + "eval_steps_per_second": 3.595, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00016188247931910626, + "loss": 0.7409, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00015876739348586038, + "loss": 0.7349, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.8517184035476718, + "eval_f1": 0.8630638507115688, + "eval_loss": 0.8996167778968811, + "eval_precision": 0.8884498450738221, + "eval_recall": 0.8517184035476718, + "eval_runtime": 8.0983, + "eval_samples_per_second": 445.528, + "eval_steps_per_second": 3.581, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00015516689560361502, + "loss": 0.726, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8899667405764967, + "eval_f1": 0.892937152740948, + "eval_loss": 0.8514795303344727, + "eval_precision": 0.9046798158531489, + "eval_recall": 0.8899667405764967, + "eval_runtime": 7.766, + "eval_samples_per_second": 464.588, + "eval_steps_per_second": 3.734, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00015110461494096937, + "loss": 0.7262, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00014660721134443416, + "loss": 0.7199, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9070159567246998, + "eval_loss": 0.8142106533050537, + "eval_precision": 0.90477631497617, + "eval_recall": 0.9129711751662971, + "eval_runtime": 7.6408, + "eval_samples_per_second": 472.201, + "eval_steps_per_second": 3.795, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.0001417042002757752, + "loss": 0.7218, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00013642775910858787, + "loss": 0.7151, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.9103028644343328, + "eval_loss": 0.8058192729949951, + "eval_precision": 0.9070720065079413, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.1494, + "eval_samples_per_second": 442.73, + "eval_steps_per_second": 3.559, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0001308125159553345, + "loss": 0.7152, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9093680709534369, + "eval_f1": 0.9013298349197362, + "eval_loss": 0.8139892220497131, + "eval_precision": 0.9004110235379639, + "eval_recall": 0.9093680709534369, + "eval_runtime": 8.2547, + "eval_samples_per_second": 437.083, + "eval_steps_per_second": 3.513, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.0001248953224107272, + "loss": 0.7112, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00011871501170288919, + "loss": 0.711, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.8819290465631929, + "eval_f1": 0.8864016010986537, + "eval_loss": 0.8521913886070251, + "eval_precision": 0.8990394098045063, + "eval_recall": 0.8819290465631929, + "eval_runtime": 8.442, + "eval_samples_per_second": 427.388, + "eval_steps_per_second": 3.435, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00011231214383949673, + "loss": 0.7107, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010572873942144917, + "loss": 0.7111, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9193458980044346, + "eval_f1": 0.9106722436787885, + "eval_loss": 0.8081309199333191, + "eval_precision": 0.9105537156932374, + "eval_recall": 0.9193458980044346, + "eval_runtime": 8.5961, + "eval_samples_per_second": 419.725, + "eval_steps_per_second": 3.374, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 9.900800387099059e-05, + "loss": 0.7035, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9099606211584155, + "eval_loss": 0.8164511919021606, + "eval_precision": 0.9103425479496782, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.3607, + "eval_samples_per_second": 431.541, + "eval_steps_per_second": 3.469, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 9.219404388411121e-05, + "loss": 0.7051, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 8.533157796808965e-05, + "loss": 0.7005, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9176829268292683, + "eval_f1": 0.9149931305100083, + "eval_loss": 0.8057773113250732, + "eval_precision": 0.9136297883836986, + "eval_recall": 0.9176829268292683, + "eval_runtime": 8.2722, + "eval_samples_per_second": 436.162, + "eval_steps_per_second": 3.506, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 7.846564296385345e-05, + "loss": 0.7012, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 7.164129847918612e-05, + "loss": 0.6975, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9029933481152993, + "eval_f1": 0.9045218579711964, + "eval_loss": 0.8318495154380798, + "eval_precision": 0.9099746342129503, + "eval_recall": 0.9029933481152993, + "eval_runtime": 8.1399, + "eval_samples_per_second": 443.247, + "eval_steps_per_second": 3.563, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 6.490333117252092e-05, + "loss": 0.7033, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9129711751662971, + "eval_f1": 0.9070761690471957, + "eval_loss": 0.8104382753372192, + "eval_precision": 0.9040788632137852, + "eval_recall": 0.9129711751662971, + "eval_runtime": 8.2261, + "eval_samples_per_second": 438.602, + "eval_steps_per_second": 3.525, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 5.829596082804002e-05, + "loss": 0.6955, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.186255015104426e-05, + "loss": 0.6933, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.905713920676339, + "eval_loss": 0.8186340928077698, + "eval_precision": 0.9046215751339501, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.1912, + "eval_samples_per_second": 440.472, + "eval_steps_per_second": 3.54, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 4.564532018814091e-05, + "loss": 0.6965, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9114686263626974, + "eval_loss": 0.8115004301071167, + "eval_precision": 0.9102352427155284, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.2759, + "eval_samples_per_second": 435.962, + "eval_steps_per_second": 3.504, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 3.968507323988414e-05, + "loss": 0.6937, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.4020925084331305e-05, + "loss": 0.6907, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9168514412416852, + "eval_f1": 0.9123402535947018, + "eval_loss": 0.8141847848892212, + "eval_precision": 0.9101482277775821, + "eval_recall": 0.9168514412416852, + "eval_runtime": 7.8511, + "eval_samples_per_second": 459.551, + "eval_steps_per_second": 3.694, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.8690048268871798e-05, + "loss": 0.6935, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.3727428155047567e-05, + "loss": 0.6912, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9165593006457079, + "eval_loss": 0.8030330538749695, + "eval_precision": 0.9152832938304444, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.1974, + "eval_samples_per_second": 440.142, + "eval_steps_per_second": 3.538, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.9165633317389254e-05, + "loss": 0.6925, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9182372505543237, + "eval_f1": 0.9126989221907172, + "eval_loss": 0.8106822371482849, + "eval_precision": 0.9114571220700429, + "eval_recall": 0.9182372505543237, + "eval_runtime": 8.21, + "eval_samples_per_second": 439.467, + "eval_steps_per_second": 3.532, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.5034601803088852e-05, + "loss": 0.6883, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.1361444655239707e-05, + "loss": 0.6892, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.9229490022172949, + "eval_f1": 0.9182043640992674, + "eval_loss": 0.7995796203613281, + "eval_precision": 0.9174042023103985, + "eval_recall": 0.9229490022172949, + "eval_runtime": 8.0656, + "eval_samples_per_second": 447.33, + "eval_steps_per_second": 3.596, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 8.170267989077045e-06, + "loss": 0.6873, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 5.482014788893074e-06, + "loss": 0.6866, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.91669499330135, + "eval_loss": 0.8054488897323608, + "eval_precision": 0.9143933071148412, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.3349, + "eval_samples_per_second": 432.879, + "eval_steps_per_second": 3.479, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 3.3143274638787704e-06, + "loss": 0.6881, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.9136223093292011, + "eval_loss": 0.8099495768547058, + "eval_precision": 0.9132806494238594, + "eval_recall": 0.917960088691796, + "eval_runtime": 8.0983, + "eval_samples_per_second": 445.527, + "eval_steps_per_second": 3.581, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.681432064907368e-06, + "loss": 0.6897, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 5.94044922119455e-07, + "loss": 0.6884, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9207317073170732, + "eval_f1": 0.9164414181965655, + "eval_loss": 0.7998366355895996, + "eval_precision": 0.9134990411107916, + "eval_recall": 0.9207317073170732, + "eval_runtime": 8.2049, + "eval_samples_per_second": 439.738, + "eval_steps_per_second": 3.534, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 5.9302316026391064e-08, + "loss": 0.6913, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9156300552880473, + "eval_loss": 0.807349443435669, + "eval_precision": 0.9147287384160769, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.3058, + "eval_samples_per_second": 434.394, + "eval_steps_per_second": 3.492, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.037072725913030305, + "learning_rate": 0.00016960595480294588, + "metric": "eval/loss", + "weight_decay": 0.1893111958073303 + } +} diff --git a/run-wzu0t9n4/checkpoint-630/training_args.bin b/run-wzu0t9n4/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fdbbd7434b8d1fd3459a33897d022b5c77485171 --- /dev/null +++ b/run-wzu0t9n4/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b412fd7f26b2ba6046858b14cc1b9a19c1795e7e39ef0d3c8380b0e2b413415d +size 4792 diff --git a/run-xls0dm9g/checkpoint-1190/model.safetensors b/run-xls0dm9g/checkpoint-1190/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37647578ddc7ba918b6c743157cc2cf412600e54 --- /dev/null +++ b/run-xls0dm9g/checkpoint-1190/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd86b5e7d336923440f77a5d4bdc210e04826ba2c433bcead005fff7685e023f +size 198025308 diff --git a/run-xls0dm9g/checkpoint-1190/optimizer.pt b/run-xls0dm9g/checkpoint-1190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..23d3677ea4c43e31bb562a2fb6016a4a85304ef4 --- /dev/null +++ b/run-xls0dm9g/checkpoint-1190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13413dec99acad8f2cc29c7cb6007903138d99a45d167ec2c9b01e4b3b46a97f +size 395900602 diff --git a/run-xls0dm9g/checkpoint-1190/rng_state.pth b/run-xls0dm9g/checkpoint-1190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa10329c52a02715f873c9a50812feb7d32c8cd3 --- /dev/null +++ b/run-xls0dm9g/checkpoint-1190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f5febab37757cc5268c77056c937c9c526090d892464a785cf2004d48e5d85 +size 14244 diff --git a/run-xls0dm9g/checkpoint-1190/scheduler.pt b/run-xls0dm9g/checkpoint-1190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..13bc60dc5889924f17c2e2068f06b31c4203f752 --- /dev/null +++ b/run-xls0dm9g/checkpoint-1190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d26eee25a0ff98f5a456550d26bb938e906b8ea2e0f737ea49585f63b3ed3d1 +size 1064 diff --git a/run-xls0dm9g/checkpoint-1190/trainer_state.json b/run-xls0dm9g/checkpoint-1190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0e3c94b4e5d4459bc22961ff6b3c4a0f02f2b8dc --- /dev/null +++ b/run-xls0dm9g/checkpoint-1190/trainer_state.json @@ -0,0 +1,549 @@ +{ + "best_metric": 0.9251662971175166, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-xls0dm9g/checkpoint-1190", + "epoch": 28.0, + "eval_steps": 500, + "global_step": 1190, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.0884905914174244e-05, + "loss": 1.4456, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9807015061378479, + "eval_runtime": 6.9468, + "eval_samples_per_second": 519.375, + "eval_steps_per_second": 8.205, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 2.1769811828348488e-05, + "loss": 1.0522, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 3.265471774252273e-05, + "loss": 0.9074, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8695214986801147, + "eval_runtime": 7.0359, + "eval_samples_per_second": 512.802, + "eval_steps_per_second": 8.101, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 4.3539623656696975e-05, + "loss": 0.8429, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8102517127990723, + "eval_runtime": 7.0411, + "eval_samples_per_second": 512.417, + "eval_steps_per_second": 8.095, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 5.442452957087122e-05, + "loss": 0.8116, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 6.530943548504547e-05, + "loss": 0.797, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8051577806472778, + "eval_runtime": 6.8747, + "eval_samples_per_second": 524.821, + "eval_steps_per_second": 8.291, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 7.619434139921971e-05, + "loss": 0.7856, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 8.707924731339395e-05, + "loss": 0.7825, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.7985389232635498, + "eval_runtime": 6.8786, + "eval_samples_per_second": 524.525, + "eval_steps_per_second": 8.287, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 9.796415322756819e-05, + "loss": 0.7768, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.813129186630249, + "eval_runtime": 6.7672, + "eval_samples_per_second": 533.159, + "eval_steps_per_second": 8.423, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00010293952330374773, + "loss": 0.769, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00010259303120171935, + "loss": 0.7597, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8035091757774353, + "eval_runtime": 6.7614, + "eval_samples_per_second": 533.615, + "eval_steps_per_second": 8.43, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00010191514213121855, + "loss": 0.7627, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.000100910252462995, + "loss": 0.7494, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8094068169593811, + "eval_runtime": 6.9018, + "eval_samples_per_second": 522.764, + "eval_steps_per_second": 8.259, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 9.958487929215501e-05, + "loss": 0.7392, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8033455610275269, + "eval_runtime": 6.9606, + "eval_samples_per_second": 518.347, + "eval_steps_per_second": 8.189, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 9.794761817229904e-05, + "loss": 0.7475, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 9.600908737005814e-05, + "loss": 0.7369, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8000779151916504, + "eval_runtime": 6.9887, + "eval_samples_per_second": 516.264, + "eval_steps_per_second": 8.156, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 9.378185900156031e-05, + "loss": 0.7288, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8052855134010315, + "eval_runtime": 6.95, + "eval_samples_per_second": 519.14, + "eval_steps_per_second": 8.201, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 9.128037749743317e-05, + "loss": 0.7262, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 8.85208659251284e-05, + "loss": 0.7243, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8093820810317993, + "eval_runtime": 6.9812, + "eval_samples_per_second": 516.814, + "eval_steps_per_second": 8.165, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 8.55212207761041e-05, + "loss": 0.7223, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 8.230089590021034e-05, + "loss": 0.7239, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.7984662652015686, + "eval_runtime": 6.9083, + "eval_samples_per_second": 522.269, + "eval_steps_per_second": 8.251, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 7.888077634000758e-05, + "loss": 0.7174, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.8118646740913391, + "eval_runtime": 6.7002, + "eval_samples_per_second": 538.493, + "eval_steps_per_second": 8.507, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 7.528304288325165e-05, + "loss": 0.7219, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 7.153102821197356e-05, + "loss": 0.7091, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8183057308197021, + "eval_runtime": 6.7274, + "eval_samples_per_second": 536.318, + "eval_steps_per_second": 8.473, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 6.764906558108306e-05, + "loss": 0.7148, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 6.36623310078731e-05, + "loss": 0.7085, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8175063729286194, + "eval_runtime": 6.6657, + "eval_samples_per_second": 541.279, + "eval_steps_per_second": 8.551, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 5.959667999588694e-05, + "loss": 0.7095, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.8066895008087158, + "eval_runtime": 6.6344, + "eval_samples_per_second": 543.834, + "eval_steps_per_second": 8.592, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 5.547847985205599e-05, + "loss": 0.705, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 5.133443868459643e-05, + "loss": 0.706, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8048518896102905, + "eval_runtime": 6.7061, + "eval_samples_per_second": 538.018, + "eval_steps_per_second": 8.5, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 4.7191432190678695e-05, + "loss": 0.7011, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 4.3076329357218296e-05, + "loss": 0.704, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8273409605026245, + "eval_runtime": 6.7627, + "eval_samples_per_second": 533.516, + "eval_steps_per_second": 8.429, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 3.9015818205185135e-05, + "loss": 0.7064, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.7918920516967773, + "eval_runtime": 6.7696, + "eval_samples_per_second": 532.972, + "eval_steps_per_second": 8.42, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 3.503623270754648e-05, + "loss": 0.7001, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 3.116338200334715e-05, + "loss": 0.6966, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8047279715538025, + "eval_runtime": 7.0973, + "eval_samples_per_second": 508.363, + "eval_steps_per_second": 8.031, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 2.7422383015539016e-05, + "loss": 0.6946, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.8015788793563843, + "eval_runtime": 6.9481, + "eval_samples_per_second": 519.278, + "eval_steps_per_second": 8.204, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 2.3837497558097454e-05, + "loss": 0.6989, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.043197498884775e-05, + "loss": 0.6969, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.8005701303482056, + "eval_runtime": 7.0372, + "eval_samples_per_second": 512.701, + "eval_steps_per_second": 8.1, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.722790142845804e-05, + "loss": 0.6964, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.424605652347164e-05, + "loss": 0.6947, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.7947578430175781, + "eval_runtime": 6.7748, + "eval_samples_per_second": 532.563, + "eval_steps_per_second": 8.414, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.1505778682325462e-05, + "loss": 0.6917, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8022744655609131, + "eval_runtime": 6.5556, + "eval_samples_per_second": 550.368, + "eval_steps_per_second": 8.695, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 9.02483965835048e-06, + "loss": 0.6916, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.819329293131826e-06, + "loss": 0.6901, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8050836324691772, + "eval_runtime": 7.2761, + "eval_samples_per_second": 495.87, + "eval_steps_per_second": 7.834, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.9035511677119265e-06, + "loss": 0.6908, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.2899298383786797e-06, + "loss": 0.6954, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.795009434223175, + "eval_runtime": 6.5383, + "eval_samples_per_second": 551.824, + "eval_steps_per_second": 8.718, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.988930258650284e-06, + "loss": 0.6933, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9251662971175166, + "eval_loss": 0.792511522769928, + "eval_runtime": 6.6527, + "eval_samples_per_second": 542.336, + "eval_steps_per_second": 8.568, + "step": 1190 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00010298795595718708, + "metric": "eval/loss", + "warmup_ratio": 0.1950587360680428 + } +} diff --git a/run-xls0dm9g/checkpoint-1190/training_args.bin b/run-xls0dm9g/checkpoint-1190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..82301c9280f896439c25119663356f6681b55be5 --- /dev/null +++ b/run-xls0dm9g/checkpoint-1190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6052f31baea3ec00afec5c05aab4ef550b66f9c051f17af532dad7afbace75fe +size 4792 diff --git a/run-xls0dm9g/checkpoint-1260/model.safetensors b/run-xls0dm9g/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04a07588593e8ea95debd41d83385fd61cc68c37 --- /dev/null +++ b/run-xls0dm9g/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79839d692ce360029e8b82d9be4936d21eec195912670ca8aef348b1108e1955 +size 198025308 diff --git a/run-xls0dm9g/checkpoint-1260/optimizer.pt b/run-xls0dm9g/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ab226a4f502ef5909beee21d736d3fa1040a72d --- /dev/null +++ b/run-xls0dm9g/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a5a55b7a19ba0d95e65bcb28acbd78baa982eea9e5a50398ce3e6c629d5b7c +size 395900602 diff --git a/run-xls0dm9g/checkpoint-1260/rng_state.pth b/run-xls0dm9g/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-xls0dm9g/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-xls0dm9g/checkpoint-1260/scheduler.pt b/run-xls0dm9g/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..85d77a873a1de513864e562bdf8e667e59279650 --- /dev/null +++ b/run-xls0dm9g/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7174953088149ca9e1d4be705f1250f0e642f7241e3987324dc585371670e798 +size 1064 diff --git a/run-xls0dm9g/checkpoint-1260/trainer_state.json b/run-xls0dm9g/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f3f5c5f4c4f735d87fabf3ecdd75527bfb72ed91 --- /dev/null +++ b/run-xls0dm9g/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9251662971175166, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-xls0dm9g/checkpoint-1190", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.0884905914174244e-05, + "loss": 1.4456, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 0.9807015061378479, + "eval_runtime": 6.9468, + "eval_samples_per_second": 519.375, + "eval_steps_per_second": 8.205, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 2.1769811828348488e-05, + "loss": 1.0522, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 3.265471774252273e-05, + "loss": 0.9074, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8927383592017738, + "eval_loss": 0.8695214986801147, + "eval_runtime": 7.0359, + "eval_samples_per_second": 512.802, + "eval_steps_per_second": 8.101, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 4.3539623656696975e-05, + "loss": 0.8429, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9132483370288248, + "eval_loss": 0.8102517127990723, + "eval_runtime": 7.0411, + "eval_samples_per_second": 512.417, + "eval_steps_per_second": 8.095, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 5.442452957087122e-05, + "loss": 0.8116, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 6.530943548504547e-05, + "loss": 0.797, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8051577806472778, + "eval_runtime": 6.8747, + "eval_samples_per_second": 524.821, + "eval_steps_per_second": 8.291, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 7.619434139921971e-05, + "loss": 0.7856, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 8.707924731339395e-05, + "loss": 0.7825, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.7985389232635498, + "eval_runtime": 6.8786, + "eval_samples_per_second": 524.525, + "eval_steps_per_second": 8.287, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 9.796415322756819e-05, + "loss": 0.7768, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.9104767184035477, + "eval_loss": 0.813129186630249, + "eval_runtime": 6.7672, + "eval_samples_per_second": 533.159, + "eval_steps_per_second": 8.423, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00010293952330374773, + "loss": 0.769, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.00010259303120171935, + "loss": 0.7597, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.8035091757774353, + "eval_runtime": 6.7614, + "eval_samples_per_second": 533.615, + "eval_steps_per_second": 8.43, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.00010191514213121855, + "loss": 0.7627, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.000100910252462995, + "loss": 0.7494, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9090909090909091, + "eval_loss": 0.8094068169593811, + "eval_runtime": 6.9018, + "eval_samples_per_second": 522.764, + "eval_steps_per_second": 8.259, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 9.958487929215501e-05, + "loss": 0.7392, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.9168514412416852, + "eval_loss": 0.8033455610275269, + "eval_runtime": 6.9606, + "eval_samples_per_second": 518.347, + "eval_steps_per_second": 8.189, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 9.794761817229904e-05, + "loss": 0.7475, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 9.600908737005814e-05, + "loss": 0.7369, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.91990022172949, + "eval_loss": 0.8000779151916504, + "eval_runtime": 6.9887, + "eval_samples_per_second": 516.264, + "eval_steps_per_second": 8.156, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 9.378185900156031e-05, + "loss": 0.7288, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.9165742793791575, + "eval_loss": 0.8052855134010315, + "eval_runtime": 6.95, + "eval_samples_per_second": 519.14, + "eval_steps_per_second": 8.201, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 9.128037749743317e-05, + "loss": 0.7262, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 8.85208659251284e-05, + "loss": 0.7243, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8093820810317993, + "eval_runtime": 6.9812, + "eval_samples_per_second": 516.814, + "eval_steps_per_second": 8.165, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 8.55212207761041e-05, + "loss": 0.7223, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 8.230089590021034e-05, + "loss": 0.7239, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.9185144124168514, + "eval_loss": 0.7984662652015686, + "eval_runtime": 6.9083, + "eval_samples_per_second": 522.269, + "eval_steps_per_second": 8.251, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 7.888077634000758e-05, + "loss": 0.7174, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9068736141906873, + "eval_loss": 0.8118646740913391, + "eval_runtime": 6.7002, + "eval_samples_per_second": 538.493, + "eval_steps_per_second": 8.507, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 7.528304288325165e-05, + "loss": 0.7219, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 7.153102821197356e-05, + "loss": 0.7091, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8183057308197021, + "eval_runtime": 6.7274, + "eval_samples_per_second": 536.318, + "eval_steps_per_second": 8.473, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 6.764906558108306e-05, + "loss": 0.7148, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 6.36623310078731e-05, + "loss": 0.7085, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.907150776053215, + "eval_loss": 0.8175063729286194, + "eval_runtime": 6.6657, + "eval_samples_per_second": 541.279, + "eval_steps_per_second": 8.551, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 5.959667999588694e-05, + "loss": 0.7095, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.8066895008087158, + "eval_runtime": 6.6344, + "eval_samples_per_second": 543.834, + "eval_steps_per_second": 8.592, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 5.547847985205599e-05, + "loss": 0.705, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 5.133443868459643e-05, + "loss": 0.706, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8048518896102905, + "eval_runtime": 6.7061, + "eval_samples_per_second": 538.018, + "eval_steps_per_second": 8.5, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 4.7191432190678695e-05, + "loss": 0.7011, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 4.3076329357218296e-05, + "loss": 0.704, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9016075388026608, + "eval_loss": 0.8273409605026245, + "eval_runtime": 6.7627, + "eval_samples_per_second": 533.516, + "eval_steps_per_second": 8.429, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 3.9015818205185135e-05, + "loss": 0.7064, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.7918920516967773, + "eval_runtime": 6.7696, + "eval_samples_per_second": 532.972, + "eval_steps_per_second": 8.42, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 3.503623270754648e-05, + "loss": 0.7001, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 3.116338200334715e-05, + "loss": 0.6966, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.8047279715538025, + "eval_runtime": 7.0973, + "eval_samples_per_second": 508.363, + "eval_steps_per_second": 8.031, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 2.7422383015539016e-05, + "loss": 0.6946, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.8015788793563843, + "eval_runtime": 6.9481, + "eval_samples_per_second": 519.278, + "eval_steps_per_second": 8.204, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 2.3837497558097454e-05, + "loss": 0.6989, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 2.043197498884775e-05, + "loss": 0.6969, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9243348115299335, + "eval_loss": 0.8005701303482056, + "eval_runtime": 7.0372, + "eval_samples_per_second": 512.701, + "eval_steps_per_second": 8.1, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 1.722790142845804e-05, + "loss": 0.6964, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 1.424605652347164e-05, + "loss": 0.6947, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9232261640798226, + "eval_loss": 0.7947578430175781, + "eval_runtime": 6.7748, + "eval_samples_per_second": 532.563, + "eval_steps_per_second": 8.414, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.1505778682325462e-05, + "loss": 0.6917, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.8022744655609131, + "eval_runtime": 6.5556, + "eval_samples_per_second": 550.368, + "eval_steps_per_second": 8.695, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 9.02483965835048e-06, + "loss": 0.6916, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 6.819329293131826e-06, + "loss": 0.6901, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9190687361419069, + "eval_loss": 0.8050836324691772, + "eval_runtime": 7.2761, + "eval_samples_per_second": 495.87, + "eval_steps_per_second": 7.834, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 4.9035511677119265e-06, + "loss": 0.6908, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 3.2899298383786797e-06, + "loss": 0.6954, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.795009434223175, + "eval_runtime": 6.5383, + "eval_samples_per_second": 551.824, + "eval_steps_per_second": 8.718, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 1.988930258650284e-06, + "loss": 0.6933, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9251662971175166, + "eval_loss": 0.792511522769928, + "eval_runtime": 6.6527, + "eval_samples_per_second": 542.336, + "eval_steps_per_second": 8.568, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.0089899100358969e-06, + "loss": 0.6875, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 3.564640817303516e-07, + "loss": 0.6888, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9204545454545454, + "eval_loss": 0.7999512553215027, + "eval_runtime": 6.9363, + "eval_samples_per_second": 520.16, + "eval_steps_per_second": 8.218, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 3.558465412497068e-08, + "loss": 0.6917, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.924889135254989, + "eval_loss": 0.7944599986076355, + "eval_runtime": 7.0322, + "eval_samples_per_second": 513.07, + "eval_steps_per_second": 8.106, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.00010298795595718708, + "metric": "eval/loss", + "warmup_ratio": 0.1950587360680428 + } +} diff --git a/run-xls0dm9g/checkpoint-1260/training_args.bin b/run-xls0dm9g/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..82301c9280f896439c25119663356f6681b55be5 --- /dev/null +++ b/run-xls0dm9g/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6052f31baea3ec00afec5c05aab4ef550b66f9c051f17af532dad7afbace75fe +size 4792 diff --git a/run-xoed9f6s/checkpoint-1232/model.safetensors b/run-xoed9f6s/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..398988df22d6a9b2333c102ae301356afeb1bb1d --- /dev/null +++ b/run-xoed9f6s/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8418d20d8db9ee2ad50cf881dd45241c0b8c60ce0027f81bde2aefaeefd937 +size 198025308 diff --git a/run-xoed9f6s/checkpoint-1232/optimizer.pt b/run-xoed9f6s/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ed95ea17d4d06133937725438e071a1149b0e39 --- /dev/null +++ b/run-xoed9f6s/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f60c1e3496e7cb8f037a4fd9635218e272a2938aba95f0601d0efc456410ccb +size 395900602 diff --git a/run-xoed9f6s/checkpoint-1232/rng_state.pth b/run-xoed9f6s/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-xoed9f6s/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-xoed9f6s/checkpoint-1232/scheduler.pt b/run-xoed9f6s/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a78773f6048bc79f3e28ea0057586b5aa7059b59 --- /dev/null +++ b/run-xoed9f6s/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e73fc03bf11915d3130bd8b37ad5a37770a327ccb1f6504526aed40a6cb3360 +size 1064 diff --git a/run-xoed9f6s/checkpoint-1232/trainer_state.json b/run-xoed9f6s/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2ace1090f1796843904b83cfec9937fef53db06e --- /dev/null +++ b/run-xoed9f6s/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9221175166297118, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-xoed9f6s/checkpoint-1147", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.340133949004229e-05, + "loss": 1.2297, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8896895787139689, + "eval_loss": 0.9474170804023743, + "eval_runtime": 6.7414, + "eval_samples_per_second": 535.203, + "eval_steps_per_second": 8.455, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00012680267898008458, + "loss": 0.87, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001902040184701269, + "loss": 0.8098, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8813747228381374, + "eval_loss": 0.8667125105857849, + "eval_runtime": 6.7993, + "eval_samples_per_second": 530.646, + "eval_steps_per_second": 8.383, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00025360535796016916, + "loss": 0.8008, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8156024217605591, + "eval_runtime": 6.499, + "eval_samples_per_second": 555.165, + "eval_steps_per_second": 8.771, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003170066974502115, + "loss": 0.7982, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0003804080369402538, + "loss": 0.7846, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7838137472283814, + "eval_loss": 0.9917212724685669, + "eval_runtime": 6.8977, + "eval_samples_per_second": 523.076, + "eval_steps_per_second": 8.264, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004438093764302961, + "loss": 0.7872, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00045557110890671074, + "loss": 0.7979, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8375145196914673, + "eval_runtime": 6.8407, + "eval_samples_per_second": 527.429, + "eval_steps_per_second": 8.332, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004538465943125304, + "loss": 0.7939, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8844235033259423, + "eval_loss": 0.852131187915802, + "eval_runtime": 6.5826, + "eval_samples_per_second": 548.11, + "eval_steps_per_second": 8.659, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00045081395563502955, + "loss": 0.7838, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004464907582696734, + "loss": 0.777, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8381374722838137, + "eval_loss": 0.930336058139801, + "eval_runtime": 6.9304, + "eval_samples_per_second": 520.607, + "eval_steps_per_second": 8.225, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004409020426777472, + "loss": 0.7811, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00043408017934909644, + "loss": 0.7669, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8946784922394678, + "eval_loss": 0.8406397700309753, + "eval_runtime": 6.8158, + "eval_samples_per_second": 529.357, + "eval_steps_per_second": 8.363, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00042606468130848915, + "loss": 0.7585, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8361960053443909, + "eval_runtime": 6.871, + "eval_samples_per_second": 525.103, + "eval_steps_per_second": 8.296, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0004169019752515875, + "loss": 0.7707, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00040664513263613365, + "loss": 0.762, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8347312808036804, + "eval_runtime": 7.015, + "eval_samples_per_second": 514.325, + "eval_steps_per_second": 8.125, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00039535356228590266, + "loss": 0.7555, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8485349416732788, + "eval_runtime": 6.7723, + "eval_samples_per_second": 532.755, + "eval_steps_per_second": 8.417, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0003830926662878941, + "loss": 0.7485, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.000369933461175848, + "loss": 0.7429, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8819290465631929, + "eval_loss": 0.874994158744812, + "eval_runtime": 6.8832, + "eval_samples_per_second": 524.172, + "eval_steps_per_second": 8.281, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003559521665942329, + "loss": 0.7461, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00034122976382521165, + "loss": 0.7453, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8347304463386536, + "eval_runtime": 7.104, + "eval_samples_per_second": 507.886, + "eval_steps_per_second": 8.024, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003258515267356496, + "loss": 0.7385, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9027161862527716, + "eval_loss": 0.828117311000824, + "eval_runtime": 6.9867, + "eval_samples_per_second": 516.407, + "eval_steps_per_second": 8.158, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003099065278609722, + "loss": 0.743, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00029348712248669504, + "loss": 0.7238, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8921840354767184, + "eval_loss": 0.8398742079734802, + "eval_runtime": 6.8507, + "eval_samples_per_second": 526.661, + "eval_steps_per_second": 8.32, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00027668841371588195, + "loss": 0.7307, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0002596077016209261, + "loss": 0.7207, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8312913775444031, + "eval_runtime": 6.6832, + "eval_samples_per_second": 539.862, + "eval_steps_per_second": 8.529, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00024234391967022938, + "loss": 0.7229, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8191869854927063, + "eval_runtime": 6.7468, + "eval_samples_per_second": 534.772, + "eval_steps_per_second": 8.448, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00022499706169406167, + "loss": 0.7144, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002076676027086822, + "loss": 0.7124, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8160085678100586, + "eval_runtime": 6.9427, + "eval_samples_per_second": 519.685, + "eval_steps_per_second": 8.21, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00019045591695337417, + "loss": 0.706, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00017346169651119288, + "loss": 0.7113, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8260354995727539, + "eval_runtime": 6.5192, + "eval_samples_per_second": 553.438, + "eval_steps_per_second": 8.743, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00015678337388084118, + "loss": 0.7162, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8109472990036011, + "eval_runtime": 6.7231, + "eval_samples_per_second": 536.656, + "eval_steps_per_second": 8.478, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0001405175518442036, + "loss": 0.7032, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00012475844393181377, + "loss": 0.7027, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8136425018310547, + "eval_runtime": 6.6374, + "eval_samples_per_second": 543.587, + "eval_steps_per_second": 8.588, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00010959732872714193, + "loss": 0.699, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8166643977165222, + "eval_runtime": 6.7096, + "eval_samples_per_second": 537.734, + "eval_steps_per_second": 8.495, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 9.512202117043738e-05, + "loss": 0.7011, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 8.141636392439516e-05, + "loss": 0.6961, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8066996335983276, + "eval_runtime": 6.7695, + "eval_samples_per_second": 532.976, + "eval_steps_per_second": 8.42, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.855974174771499e-05, + "loss": 0.6963, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.6626621689358186e-05, + "loss": 0.6933, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8095411658287048, + "eval_runtime": 6.3965, + "eval_samples_per_second": 564.061, + "eval_steps_per_second": 8.911, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.568612176675087e-05, + "loss": 0.6908, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8076213598251343, + "eval_runtime": 6.87, + "eval_samples_per_second": 525.182, + "eval_steps_per_second": 8.297, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.5801610626198495e-05, + "loss": 0.6903, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.7030340504326517e-05, + "loss": 0.6887, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8136608600616455, + "eval_runtime": 7.0585, + "eval_samples_per_second": 511.154, + "eval_steps_per_second": 8.075, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.942311561647571e-05, + "loss": 0.6899, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.3023997892785923e-05, + "loss": 0.6901, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.8040250539779663, + "eval_runtime": 6.8164, + "eval_samples_per_second": 529.309, + "eval_steps_per_second": 8.362, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 7.870051766379023e-06, + "loss": 0.6905, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8067538142204285, + "eval_runtime": 7.0176, + "eval_samples_per_second": 514.137, + "eval_steps_per_second": 8.122, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 3.991129491856849e-06, + "loss": 0.684, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.4096982375755581e-06, + "loss": 0.6859, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8104121088981628, + "eval_runtime": 6.6859, + "eval_samples_per_second": 539.64, + "eval_steps_per_second": 8.525, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0004560019417168427, + "metric": "eval/loss", + "warmup_ratio": 0.1481748548597098 + } +} diff --git a/run-xoed9f6s/checkpoint-1232/training_args.bin b/run-xoed9f6s/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2be8352a0dfb8248dbd93fa3d7287a1936dbbfb1 --- /dev/null +++ b/run-xoed9f6s/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b98654d9d5c22040b9db0d2aa26c027fe824a0d4fb953de3b288a6128a77c02 +size 4792 diff --git a/run-xoed9f6s/checkpoint-1260/model.safetensors b/run-xoed9f6s/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0265f9d7ef2cc916f95e10a853809e6cb0096ee --- /dev/null +++ b/run-xoed9f6s/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5512098920ab44cba0dcd511d8a1912dec53467ff46ea5e02df07d07ea4d454 +size 198025308 diff --git a/run-xoed9f6s/checkpoint-1260/optimizer.pt b/run-xoed9f6s/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cfe89ea0381b2e9e090fba48a7df7470e38b500 --- /dev/null +++ b/run-xoed9f6s/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455b2aec9056e556d9fef5e517df22cdf41caabb3f573adadaaee7b1325b8e47 +size 395900602 diff --git a/run-xoed9f6s/checkpoint-1260/rng_state.pth b/run-xoed9f6s/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-xoed9f6s/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-xoed9f6s/checkpoint-1260/scheduler.pt b/run-xoed9f6s/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8df01c6c974b6b626a5fec28d229c58fcbb909ed --- /dev/null +++ b/run-xoed9f6s/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb298f10d8af8e20534f4233acc13a631cd1b34d7652d966e4893d1e06292eaa +size 1064 diff --git a/run-xoed9f6s/checkpoint-1260/trainer_state.json b/run-xoed9f6s/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..99972466e8e88b8c3f956150ac3b795d11693085 --- /dev/null +++ b/run-xoed9f6s/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9273835920177383, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-xoed9f6s/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 6.340133949004229e-05, + "loss": 1.2297, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8896895787139689, + "eval_loss": 0.9474170804023743, + "eval_runtime": 6.7414, + "eval_samples_per_second": 535.203, + "eval_steps_per_second": 8.455, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 0.00012680267898008458, + "loss": 0.87, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001902040184701269, + "loss": 0.8098, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8813747228381374, + "eval_loss": 0.8667125105857849, + "eval_runtime": 6.7993, + "eval_samples_per_second": 530.646, + "eval_steps_per_second": 8.383, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 0.00025360535796016916, + "loss": 0.8008, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.9115853658536586, + "eval_loss": 0.8156024217605591, + "eval_runtime": 6.499, + "eval_samples_per_second": 555.165, + "eval_steps_per_second": 8.771, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003170066974502115, + "loss": 0.7982, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 0.0003804080369402538, + "loss": 0.7846, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7838137472283814, + "eval_loss": 0.9917212724685669, + "eval_runtime": 6.8977, + "eval_samples_per_second": 523.076, + "eval_steps_per_second": 8.264, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004438093764302961, + "loss": 0.7872, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 0.00045557110890671074, + "loss": 0.7979, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8375145196914673, + "eval_runtime": 6.8407, + "eval_samples_per_second": 527.429, + "eval_steps_per_second": 8.332, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 0.0004538465943125304, + "loss": 0.7939, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8844235033259423, + "eval_loss": 0.852131187915802, + "eval_runtime": 6.5826, + "eval_samples_per_second": 548.11, + "eval_steps_per_second": 8.659, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 0.00045081395563502955, + "loss": 0.7838, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 0.0004464907582696734, + "loss": 0.777, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8381374722838137, + "eval_loss": 0.930336058139801, + "eval_runtime": 6.9304, + "eval_samples_per_second": 520.607, + "eval_steps_per_second": 8.225, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 0.0004409020426777472, + "loss": 0.7811, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 0.00043408017934909644, + "loss": 0.7669, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8946784922394678, + "eval_loss": 0.8406397700309753, + "eval_runtime": 6.8158, + "eval_samples_per_second": 529.357, + "eval_steps_per_second": 8.363, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 0.00042606468130848915, + "loss": 0.7585, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8361960053443909, + "eval_runtime": 6.871, + "eval_samples_per_second": 525.103, + "eval_steps_per_second": 8.296, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 0.0004169019752515875, + "loss": 0.7707, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 0.00040664513263613365, + "loss": 0.762, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8960643015521065, + "eval_loss": 0.8347312808036804, + "eval_runtime": 7.015, + "eval_samples_per_second": 514.325, + "eval_steps_per_second": 8.125, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 0.00039535356228590266, + "loss": 0.7555, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8949556541019955, + "eval_loss": 0.8485349416732788, + "eval_runtime": 6.7723, + "eval_samples_per_second": 532.755, + "eval_steps_per_second": 8.417, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 0.0003830926662878941, + "loss": 0.7485, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 0.000369933461175848, + "loss": 0.7429, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8819290465631929, + "eval_loss": 0.874994158744812, + "eval_runtime": 6.8832, + "eval_samples_per_second": 524.172, + "eval_steps_per_second": 8.281, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 0.0003559521665942329, + "loss": 0.7461, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 0.00034122976382521165, + "loss": 0.7453, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8347304463386536, + "eval_runtime": 7.104, + "eval_samples_per_second": 507.886, + "eval_steps_per_second": 8.024, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 0.0003258515267356496, + "loss": 0.7385, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.9027161862527716, + "eval_loss": 0.828117311000824, + "eval_runtime": 6.9867, + "eval_samples_per_second": 516.407, + "eval_steps_per_second": 8.158, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 0.0003099065278609722, + "loss": 0.743, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 0.00029348712248669504, + "loss": 0.7238, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8921840354767184, + "eval_loss": 0.8398742079734802, + "eval_runtime": 6.8507, + "eval_samples_per_second": 526.661, + "eval_steps_per_second": 8.32, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 0.00027668841371588195, + "loss": 0.7307, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 0.0002596077016209261, + "loss": 0.7207, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.904379157427938, + "eval_loss": 0.8312913775444031, + "eval_runtime": 6.6832, + "eval_samples_per_second": 539.862, + "eval_steps_per_second": 8.529, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 0.00024234391967022938, + "loss": 0.7229, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9096452328159645, + "eval_loss": 0.8191869854927063, + "eval_runtime": 6.7468, + "eval_samples_per_second": 534.772, + "eval_steps_per_second": 8.448, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 0.00022499706169406167, + "loss": 0.7144, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 0.0002076676027086822, + "loss": 0.7124, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9162971175166297, + "eval_loss": 0.8160085678100586, + "eval_runtime": 6.9427, + "eval_samples_per_second": 519.685, + "eval_steps_per_second": 8.21, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 0.00019045591695337417, + "loss": 0.706, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 0.00017346169651119288, + "loss": 0.7113, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9082594235033259, + "eval_loss": 0.8260354995727539, + "eval_runtime": 6.5192, + "eval_samples_per_second": 553.438, + "eval_steps_per_second": 8.743, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 0.00015678337388084118, + "loss": 0.7162, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8109472990036011, + "eval_runtime": 6.7231, + "eval_samples_per_second": 536.656, + "eval_steps_per_second": 8.478, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 0.0001405175518442036, + "loss": 0.7032, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 0.00012475844393181377, + "loss": 0.7027, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.8136425018310547, + "eval_runtime": 6.6374, + "eval_samples_per_second": 543.587, + "eval_steps_per_second": 8.588, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 0.00010959732872714193, + "loss": 0.699, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9093680709534369, + "eval_loss": 0.8166643977165222, + "eval_runtime": 6.7096, + "eval_samples_per_second": 537.734, + "eval_steps_per_second": 8.495, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 9.512202117043738e-05, + "loss": 0.7011, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 8.141636392439516e-05, + "loss": 0.6961, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8066996335983276, + "eval_runtime": 6.7695, + "eval_samples_per_second": 532.976, + "eval_steps_per_second": 8.42, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 6.855974174771499e-05, + "loss": 0.6963, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 5.6626621689358186e-05, + "loss": 0.6933, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9135254988913526, + "eval_loss": 0.8095411658287048, + "eval_runtime": 6.3965, + "eval_samples_per_second": 564.061, + "eval_steps_per_second": 8.911, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 4.568612176675087e-05, + "loss": 0.6908, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.8076213598251343, + "eval_runtime": 6.87, + "eval_samples_per_second": 525.182, + "eval_steps_per_second": 8.297, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 3.5801610626198495e-05, + "loss": 0.6903, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 2.7030340504326517e-05, + "loss": 0.6887, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8136608600616455, + "eval_runtime": 7.0585, + "eval_samples_per_second": 511.154, + "eval_steps_per_second": 8.075, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 1.942311561647571e-05, + "loss": 0.6899, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 1.3023997892785923e-05, + "loss": 0.6901, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9221175166297118, + "eval_loss": 0.8040250539779663, + "eval_runtime": 6.8164, + "eval_samples_per_second": 529.309, + "eval_steps_per_second": 8.362, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 7.870051766379023e-06, + "loss": 0.6905, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9210088691796009, + "eval_loss": 0.8067538142204285, + "eval_runtime": 7.0176, + "eval_samples_per_second": 514.137, + "eval_steps_per_second": 8.122, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 3.991129491856849e-06, + "loss": 0.684, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 1.4096982375755581e-06, + "loss": 0.6859, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9182372505543237, + "eval_loss": 0.8104121088981628, + "eval_runtime": 6.6859, + "eval_samples_per_second": 539.64, + "eval_steps_per_second": 8.525, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 1.4070995319664157e-07, + "loss": 0.6909, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9273835920177383, + "eval_loss": 0.7906843423843384, + "eval_runtime": 7.0789, + "eval_samples_per_second": 509.68, + "eval_steps_per_second": 8.052, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 0.0004560019417168427, + "metric": "eval/loss", + "warmup_ratio": 0.1481748548597098 + } +} diff --git a/run-xoed9f6s/checkpoint-1260/training_args.bin b/run-xoed9f6s/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2be8352a0dfb8248dbd93fa3d7287a1936dbbfb1 --- /dev/null +++ b/run-xoed9f6s/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b98654d9d5c22040b9db0d2aa26c027fe824a0d4fb953de3b288a6128a77c02 +size 4792 diff --git a/run-xq4st66k/checkpoint-573/model.safetensors b/run-xq4st66k/checkpoint-573/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0cbfd099bf080da3c0242dfa3f88b08fdaed7598 --- /dev/null +++ b/run-xq4st66k/checkpoint-573/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de2588a54d632ca54fb6d732afc53c7879fde32e715dc95f03facd683b9bcc94 +size 198025308 diff --git a/run-xq4st66k/checkpoint-573/optimizer.pt b/run-xq4st66k/checkpoint-573/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..556f85cf8ae458f00654c5a2553eb796018ecebd --- /dev/null +++ b/run-xq4st66k/checkpoint-573/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:544f3168a1141e0a48ac01a629ee4e1e40778df4f8d2e0903dfc16e3a36c81df +size 395900602 diff --git a/run-xq4st66k/checkpoint-573/rng_state.pth b/run-xq4st66k/checkpoint-573/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e8713f9caaf617efce0d3935618a93ad2f5f391 --- /dev/null +++ b/run-xq4st66k/checkpoint-573/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9102bb312b12c2313ea7585eb813beef5c548592778aaea4ab0516e14ecd38e5 +size 14244 diff --git a/run-xq4st66k/checkpoint-573/scheduler.pt b/run-xq4st66k/checkpoint-573/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d01bf88482a0a4dcb4ba14655123d0ddc06da561 --- /dev/null +++ b/run-xq4st66k/checkpoint-573/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:426dada6af22dba2dcaf779812c7381619a92973e3c4457d0081f09f77e8cc34 +size 1064 diff --git a/run-xq4st66k/checkpoint-573/trainer_state.json b/run-xq4st66k/checkpoint-573/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6c585a975f80739dc7bf363d5d7913ecb683505c --- /dev/null +++ b/run-xq4st66k/checkpoint-573/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": 0.9190892767305735, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-xq4st66k/checkpoint-573", + "epoch": 26.96470588235294, + "eval_steps": 500, + "global_step": 573, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.7731728936378977e-05, + "loss": 1.4645, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.013211727142334, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.7686, + "eval_samples_per_second": 411.467, + "eval_steps_per_second": 3.307, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.5463457872757954e-05, + "loss": 1.1051, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.3195186809136934e-05, + "loss": 0.925, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8725055432372506, + "eval_f1": 0.8468320821800018, + "eval_loss": 0.9056642651557922, + "eval_precision": 0.8706877853508009, + "eval_recall": 0.8725055432372506, + "eval_runtime": 8.5699, + "eval_samples_per_second": 421.01, + "eval_steps_per_second": 3.384, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.092691574551591e-05, + "loss": 0.8512, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.8961296226377654, + "eval_loss": 0.8541654944419861, + "eval_precision": 0.8923134128173366, + "eval_recall": 0.9046563192904656, + "eval_runtime": 8.5607, + "eval_samples_per_second": 421.461, + "eval_steps_per_second": 3.388, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 8.86586446818949e-05, + "loss": 0.8239, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00010639037361827387, + "loss": 0.7976, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8899667405764967, + "eval_f1": 0.8921350020416047, + "eval_loss": 0.8694923520088196, + "eval_precision": 0.9004613707101828, + "eval_recall": 0.8899667405764967, + "eval_runtime": 8.3164, + "eval_samples_per_second": 433.841, + "eval_steps_per_second": 3.487, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012412210255465284, + "loss": 0.7928, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00014185383149103181, + "loss": 0.7795, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9052480046101129, + "eval_loss": 0.8080280423164368, + "eval_precision": 0.9016586377955557, + "eval_recall": 0.9104767184035477, + "eval_runtime": 8.4299, + "eval_samples_per_second": 427.999, + "eval_steps_per_second": 3.44, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001595855604274108, + "loss": 0.7643, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.9022655213894499, + "eval_loss": 0.825640857219696, + "eval_precision": 0.9079668249279793, + "eval_recall": 0.9016075388026608, + "eval_runtime": 8.3677, + "eval_samples_per_second": 431.181, + "eval_steps_per_second": 3.466, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001718346639639705, + "loss": 0.7679, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00017137937128375267, + "loss": 0.7566, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9044406345726159, + "eval_loss": 0.8208711743354797, + "eval_precision": 0.9039371410631817, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.2702, + "eval_samples_per_second": 436.264, + "eval_steps_per_second": 3.507, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00017036329784682366, + "loss": 0.7507, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001687931119176688, + "loss": 0.7452, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8048780487804879, + "eval_f1": 0.827684648610607, + "eval_loss": 0.97055983543396, + "eval_precision": 0.8841573161615661, + "eval_recall": 0.8048780487804879, + "eval_runtime": 8.2344, + "eval_samples_per_second": 438.16, + "eval_steps_per_second": 3.522, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00016667911827809436, + "loss": 0.7413, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.8926736079767494, + "eval_loss": 0.8262310028076172, + "eval_precision": 0.8965465835519577, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.0094, + "eval_samples_per_second": 450.469, + "eval_steps_per_second": 3.621, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00016403519059923153, + "loss": 0.7403, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00016087868039171122, + "loss": 0.7328, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.9026280326557036, + "eval_loss": 0.814792811870575, + "eval_precision": 0.9019345277771287, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.1626, + "eval_samples_per_second": 442.014, + "eval_steps_per_second": 3.553, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00015723030313155058, + "loss": 0.7265, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8032150776053215, + "eval_f1": 0.8253239119452892, + "eval_loss": 0.9775307178497314, + "eval_precision": 0.8819540695233256, + "eval_recall": 0.8032150776053215, + "eval_runtime": 8.1381, + "eval_samples_per_second": 443.348, + "eval_steps_per_second": 3.563, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00015311400230908103, + "loss": 0.7255, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00014855679229313425, + "loss": 0.7191, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9027161862527716, + "eval_f1": 0.9020455345543501, + "eval_loss": 0.8215241432189941, + "eval_precision": 0.902219267264671, + "eval_recall": 0.9027161862527716, + "eval_runtime": 8.0129, + "eval_samples_per_second": 450.275, + "eval_steps_per_second": 3.619, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00014358858104173488, + "loss": 0.7211, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00013824197382280865, + "loss": 0.7161, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.905467267502716, + "eval_loss": 0.8100427389144897, + "eval_precision": 0.9031969284870455, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.0406, + "eval_samples_per_second": 448.725, + "eval_steps_per_second": 3.607, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0001325520592330447, + "loss": 0.715, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.909741656917909, + "eval_loss": 0.8092275261878967, + "eval_precision": 0.9085132429911835, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.2039, + "eval_samples_per_second": 439.79, + "eval_steps_per_second": 3.535, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00012655617891922224, + "loss": 0.7095, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00012029368251326915, + "loss": 0.7073, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9048851136799948, + "eval_loss": 0.8166700005531311, + "eval_precision": 0.9084258765593346, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.148, + "eval_samples_per_second": 442.81, + "eval_steps_per_second": 3.559, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00011380566938936022, + "loss": 0.7098, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010713471893784464, + "loss": 0.7082, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9081498102023406, + "eval_loss": 0.8111467957496643, + "eval_precision": 0.907444243680597, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.2995, + "eval_samples_per_second": 434.723, + "eval_steps_per_second": 3.494, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001003246111261564, + "loss": 0.7041, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.9042470838058223, + "eval_loss": 0.8228284120559692, + "eval_precision": 0.9085181635800026, + "eval_recall": 0.9032705099778271, + "eval_runtime": 8.2614, + "eval_samples_per_second": 436.731, + "eval_steps_per_second": 3.51, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 9.34200391806032e-05, + "loss": 0.7034, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 8.646631627464037e-05, + "loss": 0.7, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9149979761575759, + "eval_loss": 0.8076518774032593, + "eval_precision": 0.9123128982131027, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.0083, + "eval_samples_per_second": 450.534, + "eval_steps_per_second": 3.621, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 7.950907814856916e-05, + "loss": 0.7013, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 7.259398361230024e-05, + "loss": 0.6958, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9089317707889862, + "eval_loss": 0.816810131072998, + "eval_precision": 0.906904843808059, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.2369, + "eval_samples_per_second": 438.03, + "eval_steps_per_second": 3.521, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 6.576641489671676e-05, + "loss": 0.7042, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9119169671633489, + "eval_loss": 0.8107531666755676, + "eval_precision": 0.9108203828671632, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.0351, + "eval_samples_per_second": 449.032, + "eval_steps_per_second": 3.609, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 5.907117982016383e-05, + "loss": 0.6972, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.255221772468043e-05, + "loss": 0.692, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9117120331070109, + "eval_loss": 0.8130176067352295, + "eval_precision": 0.9100411281740735, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.4219, + "eval_samples_per_second": 428.407, + "eval_steps_per_second": 3.443, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 4.6252311111848256e-05, + "loss": 0.6937, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9063046781041995, + "eval_loss": 0.8149348497390747, + "eval_precision": 0.9043660682794047, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.336, + "eval_samples_per_second": 432.822, + "eval_steps_per_second": 3.479, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.0212804870727854e-05, + "loss": 0.6939, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.447333494052686e-05, + "loss": 0.6902, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9105057204776024, + "eval_loss": 0.8112707138061523, + "eval_precision": 0.9096932977502198, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.3806, + "eval_samples_per_second": 430.517, + "eval_steps_per_second": 3.46, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.90715681887267e-05, + "loss": 0.6933, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.404295521179004e-05, + "loss": 0.692, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9159589580250473, + "eval_loss": 0.8008630871772766, + "eval_precision": 0.9149292430466959, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.7418, + "eval_samples_per_second": 466.04, + "eval_steps_per_second": 3.746, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.942049768076337e-05, + "loss": 0.6921, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9113776927146895, + "eval_loss": 0.8102603554725647, + "eval_precision": 0.9110668991474122, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.7918, + "eval_samples_per_second": 463.054, + "eval_steps_per_second": 3.722, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.5234531758633342e-05, + "loss": 0.6894, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.1512529010821149e-05, + "loss": 0.6883, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.914658851229938, + "eval_loss": 0.8015871047973633, + "eval_precision": 0.9124074698408369, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.0276, + "eval_samples_per_second": 449.449, + "eval_steps_per_second": 3.613, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 8.27891611539504e-06, + "loss": 0.6878, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 5.554914556202672e-06, + "loss": 0.6849, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9190892767305735, + "eval_loss": 0.8035950660705566, + "eval_precision": 0.9179518881638368, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.7448, + "eval_samples_per_second": 465.863, + "eval_steps_per_second": 3.744, + "step": 573 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.012617254657911534, + "learning_rate": 0.00017186137276798086, + "metric": "eval/loss", + "weight_decay": 0.02524780914832015 + } +} diff --git a/run-xq4st66k/checkpoint-573/training_args.bin b/run-xq4st66k/checkpoint-573/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..19d2224b0447ecd25364dec9d9bcb9ede22ec326 --- /dev/null +++ b/run-xq4st66k/checkpoint-573/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8931ab304ae22cece74dfbb51142618a94356ca70787060897bbd65cc0ddf1d +size 4792 diff --git a/run-xq4st66k/checkpoint-630/model.safetensors b/run-xq4st66k/checkpoint-630/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3dcc0cdf25abe12093a4a9fe285a1f8ff68cd8a0 --- /dev/null +++ b/run-xq4st66k/checkpoint-630/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07cecc82ab839bced809611fffe56106b9562f500cff32cb7807ed88482ab135 +size 198025308 diff --git a/run-xq4st66k/checkpoint-630/optimizer.pt b/run-xq4st66k/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0f5442ff535fbe3aa61b6b55a5a777593e6163f --- /dev/null +++ b/run-xq4st66k/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6817048b2ef965f47085ea3581df2f35a3b8a14296b044165e3a6512f0ffef47 +size 395900602 diff --git a/run-xq4st66k/checkpoint-630/rng_state.pth b/run-xq4st66k/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4fa305822d9bf83b96562cf1fc1ae5d411429d --- /dev/null +++ b/run-xq4st66k/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf0590ccc7857c323b9b2e426a89e450fa4acc7c473c28a6f6c37cc9f950c6c +size 14244 diff --git a/run-xq4st66k/checkpoint-630/scheduler.pt b/run-xq4st66k/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b535d640ca1aee04bdb4f1be8f4454bdc0d73d4 --- /dev/null +++ b/run-xq4st66k/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ac3b41ab14393d0f11423d27993c93c184288b8575337a99d770fa9243121b4 +size 1064 diff --git a/run-xq4st66k/checkpoint-630/trainer_state.json b/run-xq4st66k/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7702c7c15ee2cf6f8e12c32da5d93a7b8567cef3 --- /dev/null +++ b/run-xq4st66k/checkpoint-630/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": 0.9190892767305735, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-xq4st66k/checkpoint-573", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 1.7731728936378977e-05, + "loss": 1.4645, + "step": 13 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.8284368070953437, + "eval_f1": 0.7508179552480237, + "eval_loss": 1.013211727142334, + "eval_precision": 0.6864978143631778, + "eval_recall": 0.8284368070953437, + "eval_runtime": 8.7686, + "eval_samples_per_second": 411.467, + "eval_steps_per_second": 3.307, + "step": 21 + }, + { + "epoch": 1.22, + "learning_rate": 3.5463457872757954e-05, + "loss": 1.1051, + "step": 26 + }, + { + "epoch": 1.84, + "learning_rate": 5.3195186809136934e-05, + "loss": 0.925, + "step": 39 + }, + { + "epoch": 1.98, + "eval_accuracy": 0.8725055432372506, + "eval_f1": 0.8468320821800018, + "eval_loss": 0.9056642651557922, + "eval_precision": 0.8706877853508009, + "eval_recall": 0.8725055432372506, + "eval_runtime": 8.5699, + "eval_samples_per_second": 421.01, + "eval_steps_per_second": 3.384, + "step": 42 + }, + { + "epoch": 2.45, + "learning_rate": 7.092691574551591e-05, + "loss": 0.8512, + "step": 52 + }, + { + "epoch": 2.96, + "eval_accuracy": 0.9046563192904656, + "eval_f1": 0.8961296226377654, + "eval_loss": 0.8541654944419861, + "eval_precision": 0.8923134128173366, + "eval_recall": 0.9046563192904656, + "eval_runtime": 8.5607, + "eval_samples_per_second": 421.461, + "eval_steps_per_second": 3.388, + "step": 63 + }, + { + "epoch": 3.06, + "learning_rate": 8.86586446818949e-05, + "loss": 0.8239, + "step": 65 + }, + { + "epoch": 3.67, + "learning_rate": 0.00010639037361827387, + "loss": 0.7976, + "step": 78 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8899667405764967, + "eval_f1": 0.8921350020416047, + "eval_loss": 0.8694923520088196, + "eval_precision": 0.9004613707101828, + "eval_recall": 0.8899667405764967, + "eval_runtime": 8.3164, + "eval_samples_per_second": 433.841, + "eval_steps_per_second": 3.487, + "step": 85 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012412210255465284, + "loss": 0.7928, + "step": 91 + }, + { + "epoch": 4.89, + "learning_rate": 0.00014185383149103181, + "loss": 0.7795, + "step": 104 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9104767184035477, + "eval_f1": 0.9052480046101129, + "eval_loss": 0.8080280423164368, + "eval_precision": 0.9016586377955557, + "eval_recall": 0.9104767184035477, + "eval_runtime": 8.4299, + "eval_samples_per_second": 427.999, + "eval_steps_per_second": 3.44, + "step": 106 + }, + { + "epoch": 5.51, + "learning_rate": 0.0001595855604274108, + "loss": 0.7643, + "step": 117 + }, + { + "epoch": 5.98, + "eval_accuracy": 0.9016075388026608, + "eval_f1": 0.9022655213894499, + "eval_loss": 0.825640857219696, + "eval_precision": 0.9079668249279793, + "eval_recall": 0.9016075388026608, + "eval_runtime": 8.3677, + "eval_samples_per_second": 431.181, + "eval_steps_per_second": 3.466, + "step": 127 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001718346639639705, + "loss": 0.7679, + "step": 130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00017137937128375267, + "loss": 0.7566, + "step": 143 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9099223946784922, + "eval_f1": 0.9044406345726159, + "eval_loss": 0.8208711743354797, + "eval_precision": 0.9039371410631817, + "eval_recall": 0.9099223946784922, + "eval_runtime": 8.2702, + "eval_samples_per_second": 436.264, + "eval_steps_per_second": 3.507, + "step": 148 + }, + { + "epoch": 7.34, + "learning_rate": 0.00017036329784682366, + "loss": 0.7507, + "step": 156 + }, + { + "epoch": 7.95, + "learning_rate": 0.0001687931119176688, + "loss": 0.7452, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8048780487804879, + "eval_f1": 0.827684648610607, + "eval_loss": 0.97055983543396, + "eval_precision": 0.8841573161615661, + "eval_recall": 0.8048780487804879, + "eval_runtime": 8.2344, + "eval_samples_per_second": 438.16, + "eval_steps_per_second": 3.522, + "step": 170 + }, + { + "epoch": 8.56, + "learning_rate": 0.00016667911827809436, + "loss": 0.7413, + "step": 182 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.906319290465632, + "eval_f1": 0.8926736079767494, + "eval_loss": 0.8262310028076172, + "eval_precision": 0.8965465835519577, + "eval_recall": 0.906319290465632, + "eval_runtime": 8.0094, + "eval_samples_per_second": 450.469, + "eval_steps_per_second": 3.621, + "step": 191 + }, + { + "epoch": 9.18, + "learning_rate": 0.00016403519059923153, + "loss": 0.7403, + "step": 195 + }, + { + "epoch": 9.79, + "learning_rate": 0.00016087868039171122, + "loss": 0.7328, + "step": 208 + }, + { + "epoch": 9.98, + "eval_accuracy": 0.9077050997782705, + "eval_f1": 0.9026280326557036, + "eval_loss": 0.814792811870575, + "eval_precision": 0.9019345277771287, + "eval_recall": 0.9077050997782705, + "eval_runtime": 8.1626, + "eval_samples_per_second": 442.014, + "eval_steps_per_second": 3.553, + "step": 212 + }, + { + "epoch": 10.4, + "learning_rate": 0.00015723030313155058, + "loss": 0.7265, + "step": 221 + }, + { + "epoch": 10.96, + "eval_accuracy": 0.8032150776053215, + "eval_f1": 0.8253239119452892, + "eval_loss": 0.9775307178497314, + "eval_precision": 0.8819540695233256, + "eval_recall": 0.8032150776053215, + "eval_runtime": 8.1381, + "eval_samples_per_second": 443.348, + "eval_steps_per_second": 3.563, + "step": 233 + }, + { + "epoch": 11.01, + "learning_rate": 0.00015311400230908103, + "loss": 0.7255, + "step": 234 + }, + { + "epoch": 11.62, + "learning_rate": 0.00014855679229313425, + "loss": 0.7191, + "step": 247 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9027161862527716, + "eval_f1": 0.9020455345543501, + "eval_loss": 0.8215241432189941, + "eval_precision": 0.902219267264671, + "eval_recall": 0.9027161862527716, + "eval_runtime": 8.0129, + "eval_samples_per_second": 450.275, + "eval_steps_per_second": 3.619, + "step": 255 + }, + { + "epoch": 12.24, + "learning_rate": 0.00014358858104173488, + "loss": 0.7211, + "step": 260 + }, + { + "epoch": 12.85, + "learning_rate": 0.00013824197382280865, + "loss": 0.7161, + "step": 273 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.905467267502716, + "eval_loss": 0.8100427389144897, + "eval_precision": 0.9031969284870455, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.0406, + "eval_samples_per_second": 448.725, + "eval_steps_per_second": 3.607, + "step": 276 + }, + { + "epoch": 13.46, + "learning_rate": 0.0001325520592330447, + "loss": 0.715, + "step": 286 + }, + { + "epoch": 13.98, + "eval_accuracy": 0.9146341463414634, + "eval_f1": 0.909741656917909, + "eval_loss": 0.8092275261878967, + "eval_precision": 0.9085132429911835, + "eval_recall": 0.9146341463414634, + "eval_runtime": 8.2039, + "eval_samples_per_second": 439.79, + "eval_steps_per_second": 3.535, + "step": 297 + }, + { + "epoch": 14.07, + "learning_rate": 0.00012655617891922224, + "loss": 0.7095, + "step": 299 + }, + { + "epoch": 14.68, + "learning_rate": 0.00012029368251326915, + "loss": 0.7073, + "step": 312 + }, + { + "epoch": 14.96, + "eval_accuracy": 0.907150776053215, + "eval_f1": 0.9048851136799948, + "eval_loss": 0.8166700005531311, + "eval_precision": 0.9084258765593346, + "eval_recall": 0.907150776053215, + "eval_runtime": 8.148, + "eval_samples_per_second": 442.81, + "eval_steps_per_second": 3.559, + "step": 318 + }, + { + "epoch": 15.29, + "learning_rate": 0.00011380566938936022, + "loss": 0.7098, + "step": 325 + }, + { + "epoch": 15.91, + "learning_rate": 0.00010713471893784464, + "loss": 0.7082, + "step": 338 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9174057649667405, + "eval_f1": 0.9081498102023406, + "eval_loss": 0.8111467957496643, + "eval_precision": 0.907444243680597, + "eval_recall": 0.9174057649667405, + "eval_runtime": 8.2995, + "eval_samples_per_second": 434.723, + "eval_steps_per_second": 3.494, + "step": 340 + }, + { + "epoch": 16.52, + "learning_rate": 0.0001003246111261564, + "loss": 0.7041, + "step": 351 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9032705099778271, + "eval_f1": 0.9042470838058223, + "eval_loss": 0.8228284120559692, + "eval_precision": 0.9085181635800026, + "eval_recall": 0.9032705099778271, + "eval_runtime": 8.2614, + "eval_samples_per_second": 436.731, + "eval_steps_per_second": 3.51, + "step": 361 + }, + { + "epoch": 17.13, + "learning_rate": 9.34200391806032e-05, + "loss": 0.7034, + "step": 364 + }, + { + "epoch": 17.74, + "learning_rate": 8.646631627464037e-05, + "loss": 0.7, + "step": 377 + }, + { + "epoch": 17.98, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9149979761575759, + "eval_loss": 0.8076518774032593, + "eval_precision": 0.9123128982131027, + "eval_recall": 0.9204545454545454, + "eval_runtime": 8.0083, + "eval_samples_per_second": 450.534, + "eval_steps_per_second": 3.621, + "step": 382 + }, + { + "epoch": 18.35, + "learning_rate": 7.950907814856916e-05, + "loss": 0.7013, + "step": 390 + }, + { + "epoch": 18.96, + "learning_rate": 7.259398361230024e-05, + "loss": 0.6958, + "step": 403 + }, + { + "epoch": 18.96, + "eval_accuracy": 0.9132483370288248, + "eval_f1": 0.9089317707889862, + "eval_loss": 0.816810131072998, + "eval_precision": 0.906904843808059, + "eval_recall": 0.9132483370288248, + "eval_runtime": 8.2369, + "eval_samples_per_second": 438.03, + "eval_steps_per_second": 3.521, + "step": 403 + }, + { + "epoch": 19.58, + "learning_rate": 6.576641489671676e-05, + "loss": 0.7042, + "step": 416 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.914079822616408, + "eval_f1": 0.9119169671633489, + "eval_loss": 0.8107531666755676, + "eval_precision": 0.9108203828671632, + "eval_recall": 0.914079822616408, + "eval_runtime": 8.0351, + "eval_samples_per_second": 449.032, + "eval_steps_per_second": 3.609, + "step": 425 + }, + { + "epoch": 20.19, + "learning_rate": 5.907117982016383e-05, + "loss": 0.6972, + "step": 429 + }, + { + "epoch": 20.8, + "learning_rate": 5.255221772468043e-05, + "loss": 0.692, + "step": 442 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9117120331070109, + "eval_loss": 0.8130176067352295, + "eval_precision": 0.9100411281740735, + "eval_recall": 0.9171286031042128, + "eval_runtime": 8.4219, + "eval_samples_per_second": 428.407, + "eval_steps_per_second": 3.443, + "step": 446 + }, + { + "epoch": 21.41, + "learning_rate": 4.6252311111848256e-05, + "loss": 0.6937, + "step": 455 + }, + { + "epoch": 21.98, + "eval_accuracy": 0.9118625277161863, + "eval_f1": 0.9063046781041995, + "eval_loss": 0.8149348497390747, + "eval_precision": 0.9043660682794047, + "eval_recall": 0.9118625277161863, + "eval_runtime": 8.336, + "eval_samples_per_second": 432.822, + "eval_steps_per_second": 3.479, + "step": 467 + }, + { + "epoch": 22.02, + "learning_rate": 4.0212804870727854e-05, + "loss": 0.6939, + "step": 468 + }, + { + "epoch": 22.64, + "learning_rate": 3.447333494052686e-05, + "loss": 0.6902, + "step": 481 + }, + { + "epoch": 22.96, + "eval_accuracy": 0.9149113082039911, + "eval_f1": 0.9105057204776024, + "eval_loss": 0.8112707138061523, + "eval_precision": 0.9096932977502198, + "eval_recall": 0.9149113082039911, + "eval_runtime": 8.3806, + "eval_samples_per_second": 430.517, + "eval_steps_per_second": 3.46, + "step": 488 + }, + { + "epoch": 23.25, + "learning_rate": 2.90715681887267e-05, + "loss": 0.6933, + "step": 494 + }, + { + "epoch": 23.86, + "learning_rate": 2.404295521179004e-05, + "loss": 0.692, + "step": 507 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9204545454545454, + "eval_f1": 0.9159589580250473, + "eval_loss": 0.8008630871772766, + "eval_precision": 0.9149292430466959, + "eval_recall": 0.9204545454545454, + "eval_runtime": 7.7418, + "eval_samples_per_second": 466.04, + "eval_steps_per_second": 3.746, + "step": 510 + }, + { + "epoch": 24.47, + "learning_rate": 1.942049768076337e-05, + "loss": 0.6921, + "step": 520 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.9171286031042128, + "eval_f1": 0.9113776927146895, + "eval_loss": 0.8102603554725647, + "eval_precision": 0.9110668991474122, + "eval_recall": 0.9171286031042128, + "eval_runtime": 7.7918, + "eval_samples_per_second": 463.054, + "eval_steps_per_second": 3.722, + "step": 531 + }, + { + "epoch": 25.08, + "learning_rate": 1.5234531758633342e-05, + "loss": 0.6894, + "step": 533 + }, + { + "epoch": 25.69, + "learning_rate": 1.1512529010821149e-05, + "loss": 0.6883, + "step": 546 + }, + { + "epoch": 25.98, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.914658851229938, + "eval_loss": 0.8015871047973633, + "eval_precision": 0.9124074698408369, + "eval_recall": 0.91990022172949, + "eval_runtime": 8.0276, + "eval_samples_per_second": 449.449, + "eval_steps_per_second": 3.613, + "step": 552 + }, + { + "epoch": 26.31, + "learning_rate": 8.27891611539504e-06, + "loss": 0.6878, + "step": 559 + }, + { + "epoch": 26.92, + "learning_rate": 5.554914556202672e-06, + "loss": 0.6849, + "step": 572 + }, + { + "epoch": 26.96, + "eval_accuracy": 0.9223946784922394, + "eval_f1": 0.9190892767305735, + "eval_loss": 0.8035950660705566, + "eval_precision": 0.9179518881638368, + "eval_recall": 0.9223946784922394, + "eval_runtime": 7.7448, + "eval_samples_per_second": 465.863, + "eval_steps_per_second": 3.744, + "step": 573 + }, + { + "epoch": 27.53, + "learning_rate": 3.3584013509821207e-06, + "loss": 0.6885, + "step": 585 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.917960088691796, + "eval_f1": 0.913326126929979, + "eval_loss": 0.808610737323761, + "eval_precision": 0.9125367058160145, + "eval_recall": 0.917960088691796, + "eval_runtime": 7.8846, + "eval_samples_per_second": 457.599, + "eval_steps_per_second": 3.678, + "step": 595 + }, + { + "epoch": 28.14, + "learning_rate": 1.7037917284615397e-06, + "loss": 0.6879, + "step": 598 + }, + { + "epoch": 28.75, + "learning_rate": 6.019445244119728e-07, + "loss": 0.6875, + "step": 611 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.9201773835920177, + "eval_f1": 0.9151969330986872, + "eval_loss": 0.7999213337898254, + "eval_precision": 0.912410898224107, + "eval_recall": 0.9201773835920177, + "eval_runtime": 8.1185, + "eval_samples_per_second": 444.417, + "eval_steps_per_second": 3.572, + "step": 616 + }, + { + "epoch": 29.36, + "learning_rate": 6.009091751794542e-08, + "loss": 0.6898, + "step": 624 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.91990022172949, + "eval_f1": 0.9149976627581135, + "eval_loss": 0.8063719868659973, + "eval_precision": 0.9145105323429977, + "eval_recall": 0.91990022172949, + "eval_runtime": 7.8197, + "eval_samples_per_second": 461.4, + "eval_steps_per_second": 3.709, + "step": 630 + } + ], + "logging_steps": 13, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "label_smoothing_factor": 0.012617254657911534, + "learning_rate": 0.00017186137276798086, + "metric": "eval/loss", + "weight_decay": 0.02524780914832015 + } +} diff --git a/run-xq4st66k/checkpoint-630/training_args.bin b/run-xq4st66k/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..19d2224b0447ecd25364dec9d9bcb9ede22ec326 --- /dev/null +++ b/run-xq4st66k/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8931ab304ae22cece74dfbb51142618a94356ca70787060897bbd65cc0ddf1d +size 4792 diff --git a/run-zcxpn54a/checkpoint-1232/model.safetensors b/run-zcxpn54a/checkpoint-1232/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c54909d3500d21c59853a3200ba8895684b2dfd --- /dev/null +++ b/run-zcxpn54a/checkpoint-1232/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f3a71d02f62f4f158b09f8a3ad03e9b51fa71c0609202948ceeaf4a080d9824 +size 198025308 diff --git a/run-zcxpn54a/checkpoint-1232/optimizer.pt b/run-zcxpn54a/checkpoint-1232/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dcac652f8b46cdb1c4328479330a3a8fea3db8bc --- /dev/null +++ b/run-zcxpn54a/checkpoint-1232/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebae421aa66c3ac9ad7bf8782fe530a5b6e1d0e299a434e7033bc255e95d81c1 +size 395900602 diff --git a/run-zcxpn54a/checkpoint-1232/rng_state.pth b/run-zcxpn54a/checkpoint-1232/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e78fe7733f5b9bce5c6fbfa3a6145cc496d05245 --- /dev/null +++ b/run-zcxpn54a/checkpoint-1232/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20bd04985262159c8b072ab8016da869887f7af6ed0ffdedf1ca7c0cfc70216 +size 14244 diff --git a/run-zcxpn54a/checkpoint-1232/scheduler.pt b/run-zcxpn54a/checkpoint-1232/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..669f0229e9e91065aeb2f731654e2b98f0d65d34 --- /dev/null +++ b/run-zcxpn54a/checkpoint-1232/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c9cb2f417e63bc8bfa634391ec412b9700d1461a7cdc1286d583d366e3d5a6 +size 1064 diff --git a/run-zcxpn54a/checkpoint-1232/trainer_state.json b/run-zcxpn54a/checkpoint-1232/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1125a7de6a2ab0e8e732b355e51647a656e95613 --- /dev/null +++ b/run-zcxpn54a/checkpoint-1232/trainer_state.json @@ -0,0 +1,570 @@ +{ + "best_metric": 0.9226718403547672, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-zcxpn54a/checkpoint-765", + "epoch": 28.988235294117647, + "eval_steps": 500, + "global_step": 1232, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.173346159738685e-07, + "loss": 1.531, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5706762749445676, + "eval_loss": 1.4730000495910645, + "eval_runtime": 6.7305, + "eval_samples_per_second": 536.066, + "eval_steps_per_second": 8.469, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 1.634669231947737e-06, + "loss": 1.4834, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 2.4520038479216054e-06, + "loss": 1.3917, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8101441241685144, + "eval_loss": 1.2970589399337769, + "eval_runtime": 6.7613, + "eval_samples_per_second": 533.624, + "eval_steps_per_second": 8.43, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 3.269338463895474e-06, + "loss": 1.2637, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 1.0452728271484375, + "eval_runtime": 6.6566, + "eval_samples_per_second": 542.015, + "eval_steps_per_second": 8.563, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 4.0866730798693426e-06, + "loss": 1.1094, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 4.904007695843211e-06, + "loss": 0.9888, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8317627494456763, + "eval_loss": 0.9423611164093018, + "eval_runtime": 6.6891, + "eval_samples_per_second": 539.386, + "eval_steps_per_second": 8.521, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 5.72134231181708e-06, + "loss": 0.9407, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 6.538676927790948e-06, + "loss": 0.9106, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8592017738359202, + "eval_loss": 0.889560341835022, + "eval_runtime": 6.5505, + "eval_samples_per_second": 550.8, + "eval_steps_per_second": 8.702, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 7.356011543764816e-06, + "loss": 0.8913, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8791574279379157, + "eval_loss": 0.8701406121253967, + "eval_runtime": 6.7477, + "eval_samples_per_second": 534.697, + "eval_steps_per_second": 8.447, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 8.173346159738685e-06, + "loss": 0.8677, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 8.990680775712553e-06, + "loss": 0.8517, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.897450110864745, + "eval_loss": 0.8402315974235535, + "eval_runtime": 6.5868, + "eval_samples_per_second": 547.762, + "eval_steps_per_second": 8.654, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 9.808015391686422e-06, + "loss": 0.8504, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 1.062535000766029e-05, + "loss": 0.8276, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8221293687820435, + "eval_runtime": 6.7738, + "eval_samples_per_second": 532.644, + "eval_steps_per_second": 8.415, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 1.144268462363416e-05, + "loss": 0.8144, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8159420490264893, + "eval_runtime": 6.5888, + "eval_samples_per_second": 547.597, + "eval_steps_per_second": 8.651, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 1.2260019239608028e-05, + "loss": 0.8176, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 1.2563642627929662e-05, + "loss": 0.8048, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8061383962631226, + "eval_runtime": 6.6323, + "eval_samples_per_second": 544.001, + "eval_steps_per_second": 8.594, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 1.2500524393026578e-05, + "loss": 0.7937, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.800520122051239, + "eval_runtime": 6.5773, + "eval_samples_per_second": 548.555, + "eval_steps_per_second": 8.666, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 1.2381398448910326e-05, + "loss": 0.7935, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 1.2207338610399441e-05, + "loss": 0.7901, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8041113018989563, + "eval_runtime": 6.7564, + "eval_samples_per_second": 534.014, + "eval_steps_per_second": 8.436, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 1.1979913872688812e-05, + "loss": 0.7881, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 1.1701174268248412e-05, + "loss": 0.7849, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.7979102730751038, + "eval_runtime": 6.8127, + "eval_samples_per_second": 529.597, + "eval_steps_per_second": 8.367, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 1.1373632387597304e-05, + "loss": 0.7784, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.7992376685142517, + "eval_runtime": 6.7479, + "eval_samples_per_second": 534.683, + "eval_steps_per_second": 8.447, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 1.100024073052672e-05, + "loss": 0.7885, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 1.0584365091930912e-05, + "loss": 0.7761, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.7992265224456787, + "eval_runtime": 6.7327, + "eval_samples_per_second": 535.896, + "eval_steps_per_second": 8.466, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 1.01297542221489e-05, + "loss": 0.781, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.64050603530225e-06, + "loss": 0.7689, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8007858991622925, + "eval_runtime": 6.7787, + "eval_samples_per_second": 532.255, + "eval_steps_per_second": 8.409, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 9.121030670230776e-06, + "loss": 0.776, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.7990276217460632, + "eval_runtime": 6.5751, + "eval_samples_per_second": 548.735, + "eval_steps_per_second": 8.669, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.57601073699911e-06, + "loss": 0.7721, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 8.010359107316626e-06, + "loss": 0.7699, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.7920404076576233, + "eval_runtime": 6.7319, + "eval_samples_per_second": 535.952, + "eval_steps_per_second": 8.467, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 7.4291746293526835e-06, + "loss": 0.763, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.83769616613893e-06, + "loss": 0.7668, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8005101680755615, + "eval_runtime": 6.8523, + "eval_samples_per_second": 526.54, + "eval_steps_per_second": 8.318, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 6.241255371861648e-06, + "loss": 0.7757, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.7925511598587036, + "eval_runtime": 6.8107, + "eval_samples_per_second": 529.755, + "eval_steps_per_second": 8.369, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.645228631724122e-06, + "loss": 0.7652, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 5.054988598598487e-06, + "loss": 0.7594, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.7976831197738647, + "eval_runtime": 6.8159, + "eval_samples_per_second": 529.347, + "eval_steps_per_second": 8.363, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 4.475855763321266e-06, + "loss": 0.7625, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.7974503636360168, + "eval_runtime": 6.7271, + "eval_samples_per_second": 536.335, + "eval_steps_per_second": 8.473, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.913050495183416e-06, + "loss": 0.7634, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.3716459849273846e-06, + "loss": 0.7595, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.7927106618881226, + "eval_runtime": 6.76, + "eval_samples_per_second": 533.731, + "eval_steps_per_second": 8.432, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.8565225144283456e-06, + "loss": 0.7638, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.372323465277948e-06, + "loss": 0.7617, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.7929773926734924, + "eval_runtime": 6.658, + "eval_samples_per_second": 541.908, + "eval_steps_per_second": 8.561, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.9234134628141524e-06, + "loss": 0.7616, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.7986940145492554, + "eval_runtime": 6.708, + "eval_samples_per_second": 537.863, + "eval_steps_per_second": 8.497, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.5138390328916734e-06, + "loss": 0.7548, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.1472921260373756e-06, + "loss": 0.7591, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.7983978390693665, + "eval_runtime": 6.6189, + "eval_samples_per_second": 545.109, + "eval_steps_per_second": 8.612, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 8.270768377880337e-07, + "loss": 0.7592, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 5.560796251970973e-07, + "loss": 0.76, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.7961049675941467, + "eval_runtime": 6.8384, + "eval_samples_per_second": 527.612, + "eval_steps_per_second": 8.335, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 3.367432879822969e-07, + "loss": 0.7592, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.790411114692688, + "eval_runtime": 6.5864, + "eval_samples_per_second": 547.794, + "eval_steps_per_second": 8.654, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.710449488509648e-07, + "loss": 0.7542, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 6.047823149091707e-08, + "loss": 0.751, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.7922791838645935, + "eval_runtime": 6.9335, + "eval_samples_per_second": 520.372, + "eval_steps_per_second": 8.221, + "step": 1232 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 1.2574378707290284e-05, + "metric": "eval/loss", + "warmup_ratio": 0.31743684059205113 + } +} diff --git a/run-zcxpn54a/checkpoint-1232/training_args.bin b/run-zcxpn54a/checkpoint-1232/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6fd235e9c43b1a9386610710bc68e64abb9f21e3 --- /dev/null +++ b/run-zcxpn54a/checkpoint-1232/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa1f626ebe3a2431be8d9cfac9ce1a12b779a7fda37da809ab7f6252812f4dc +size 4792 diff --git a/run-zcxpn54a/checkpoint-1260/model.safetensors b/run-zcxpn54a/checkpoint-1260/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..921005fdbb71056c1ad4b1e58694c50a4bd2a8c4 --- /dev/null +++ b/run-zcxpn54a/checkpoint-1260/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03ebd3b43e9fb6387eced038c4313d4119d642fefb50cc358ae83b5239a3578 +size 198025308 diff --git a/run-zcxpn54a/checkpoint-1260/optimizer.pt b/run-zcxpn54a/checkpoint-1260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f6bfdfb8df4872c1ef4225fd95b41a0d0f18eb0 --- /dev/null +++ b/run-zcxpn54a/checkpoint-1260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:123dfce6d1dc090baf86f35a9e001d6e1ddbcf38af8c867655549462edd4f3eb +size 395900602 diff --git a/run-zcxpn54a/checkpoint-1260/rng_state.pth b/run-zcxpn54a/checkpoint-1260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd2a335cf6e0e1e6cf58a1f13d3d4f57bd54ecae --- /dev/null +++ b/run-zcxpn54a/checkpoint-1260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deeb0d472f47497c93334cb3948d4032ad244442808af19b401a7a0fea1fe65c +size 14244 diff --git a/run-zcxpn54a/checkpoint-1260/scheduler.pt b/run-zcxpn54a/checkpoint-1260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb94351a61d3e7b1bae190f00657d5725d99ffa3 --- /dev/null +++ b/run-zcxpn54a/checkpoint-1260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ffce990760ffae64ab7538389fb09d576680c8b5e2535c1f807a441269f72d +size 1064 diff --git a/run-zcxpn54a/checkpoint-1260/trainer_state.json b/run-zcxpn54a/checkpoint-1260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..22462429592a4b69ae43feb34392b5b5cefd1c37 --- /dev/null +++ b/run-zcxpn54a/checkpoint-1260/trainer_state.json @@ -0,0 +1,585 @@ +{ + "best_metric": 0.9235033259423503, + "best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-zcxpn54a/checkpoint-1260", + "epoch": 29.647058823529413, + "eval_steps": 500, + "global_step": 1260, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.61, + "learning_rate": 8.173346159738685e-07, + "loss": 1.531, + "step": 26 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5706762749445676, + "eval_loss": 1.4730000495910645, + "eval_runtime": 6.7305, + "eval_samples_per_second": 536.066, + "eval_steps_per_second": 8.469, + "step": 42 + }, + { + "epoch": 1.22, + "learning_rate": 1.634669231947737e-06, + "loss": 1.4834, + "step": 52 + }, + { + "epoch": 1.84, + "learning_rate": 2.4520038479216054e-06, + "loss": 1.3917, + "step": 78 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8101441241685144, + "eval_loss": 1.2970589399337769, + "eval_runtime": 6.7613, + "eval_samples_per_second": 533.624, + "eval_steps_per_second": 8.43, + "step": 85 + }, + { + "epoch": 2.45, + "learning_rate": 3.269338463895474e-06, + "loss": 1.2637, + "step": 104 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.8284368070953437, + "eval_loss": 1.0452728271484375, + "eval_runtime": 6.6566, + "eval_samples_per_second": 542.015, + "eval_steps_per_second": 8.563, + "step": 127 + }, + { + "epoch": 3.06, + "learning_rate": 4.0866730798693426e-06, + "loss": 1.1094, + "step": 130 + }, + { + "epoch": 3.67, + "learning_rate": 4.904007695843211e-06, + "loss": 0.9888, + "step": 156 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8317627494456763, + "eval_loss": 0.9423611164093018, + "eval_runtime": 6.6891, + "eval_samples_per_second": 539.386, + "eval_steps_per_second": 8.521, + "step": 170 + }, + { + "epoch": 4.28, + "learning_rate": 5.72134231181708e-06, + "loss": 0.9407, + "step": 182 + }, + { + "epoch": 4.89, + "learning_rate": 6.538676927790948e-06, + "loss": 0.9106, + "step": 208 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.8592017738359202, + "eval_loss": 0.889560341835022, + "eval_runtime": 6.5505, + "eval_samples_per_second": 550.8, + "eval_steps_per_second": 8.702, + "step": 212 + }, + { + "epoch": 5.51, + "learning_rate": 7.356011543764816e-06, + "loss": 0.8913, + "step": 234 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.8791574279379157, + "eval_loss": 0.8701406121253967, + "eval_runtime": 6.7477, + "eval_samples_per_second": 534.697, + "eval_steps_per_second": 8.447, + "step": 255 + }, + { + "epoch": 6.12, + "learning_rate": 8.173346159738685e-06, + "loss": 0.8677, + "step": 260 + }, + { + "epoch": 6.73, + "learning_rate": 8.990680775712553e-06, + "loss": 0.8517, + "step": 286 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.897450110864745, + "eval_loss": 0.8402315974235535, + "eval_runtime": 6.5868, + "eval_samples_per_second": 547.762, + "eval_steps_per_second": 8.654, + "step": 297 + }, + { + "epoch": 7.34, + "learning_rate": 9.808015391686422e-06, + "loss": 0.8504, + "step": 312 + }, + { + "epoch": 7.95, + "learning_rate": 1.062535000766029e-05, + "loss": 0.8276, + "step": 338 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.9057649667405765, + "eval_loss": 0.8221293687820435, + "eval_runtime": 6.7738, + "eval_samples_per_second": 532.644, + "eval_steps_per_second": 8.415, + "step": 340 + }, + { + "epoch": 8.56, + "learning_rate": 1.144268462363416e-05, + "loss": 0.8144, + "step": 364 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.906319290465632, + "eval_loss": 0.8159420490264893, + "eval_runtime": 6.5888, + "eval_samples_per_second": 547.597, + "eval_steps_per_second": 8.651, + "step": 382 + }, + { + "epoch": 9.18, + "learning_rate": 1.2260019239608028e-05, + "loss": 0.8176, + "step": 390 + }, + { + "epoch": 9.79, + "learning_rate": 1.2563642627929662e-05, + "loss": 0.8048, + "step": 416 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9101995565410199, + "eval_loss": 0.8061383962631226, + "eval_runtime": 6.6323, + "eval_samples_per_second": 544.001, + "eval_steps_per_second": 8.594, + "step": 425 + }, + { + "epoch": 10.4, + "learning_rate": 1.2500524393026578e-05, + "loss": 0.7937, + "step": 442 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.800520122051239, + "eval_runtime": 6.5773, + "eval_samples_per_second": 548.555, + "eval_steps_per_second": 8.666, + "step": 467 + }, + { + "epoch": 11.01, + "learning_rate": 1.2381398448910326e-05, + "loss": 0.7935, + "step": 468 + }, + { + "epoch": 11.62, + "learning_rate": 1.2207338610399441e-05, + "loss": 0.7901, + "step": 494 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.9151884700665188, + "eval_loss": 0.8041113018989563, + "eval_runtime": 6.7564, + "eval_samples_per_second": 534.014, + "eval_steps_per_second": 8.436, + "step": 510 + }, + { + "epoch": 12.24, + "learning_rate": 1.1979913872688812e-05, + "loss": 0.7881, + "step": 520 + }, + { + "epoch": 12.85, + "learning_rate": 1.1701174268248412e-05, + "loss": 0.7849, + "step": 546 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.7979102730751038, + "eval_runtime": 6.8127, + "eval_samples_per_second": 529.597, + "eval_steps_per_second": 8.367, + "step": 552 + }, + { + "epoch": 13.46, + "learning_rate": 1.1373632387597304e-05, + "loss": 0.7784, + "step": 572 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.7992376685142517, + "eval_runtime": 6.7479, + "eval_samples_per_second": 534.683, + "eval_steps_per_second": 8.447, + "step": 595 + }, + { + "epoch": 14.07, + "learning_rate": 1.100024073052672e-05, + "loss": 0.7885, + "step": 598 + }, + { + "epoch": 14.68, + "learning_rate": 1.0584365091930912e-05, + "loss": 0.7761, + "step": 624 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.9138026607538803, + "eval_loss": 0.7992265224456787, + "eval_runtime": 6.7327, + "eval_samples_per_second": 535.896, + "eval_steps_per_second": 8.466, + "step": 637 + }, + { + "epoch": 15.29, + "learning_rate": 1.01297542221489e-05, + "loss": 0.781, + "step": 650 + }, + { + "epoch": 15.91, + "learning_rate": 9.64050603530225e-06, + "loss": 0.7689, + "step": 676 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.9121396895787139, + "eval_loss": 0.8007858991622925, + "eval_runtime": 6.7787, + "eval_samples_per_second": 532.255, + "eval_steps_per_second": 8.409, + "step": 680 + }, + { + "epoch": 16.52, + "learning_rate": 9.121030670230776e-06, + "loss": 0.776, + "step": 702 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.9154656319290465, + "eval_loss": 0.7990276217460632, + "eval_runtime": 6.5751, + "eval_samples_per_second": 548.735, + "eval_steps_per_second": 8.669, + "step": 722 + }, + { + "epoch": 17.13, + "learning_rate": 8.57601073699911e-06, + "loss": 0.7721, + "step": 728 + }, + { + "epoch": 17.74, + "learning_rate": 8.010359107316626e-06, + "loss": 0.7699, + "step": 754 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.9226718403547672, + "eval_loss": 0.7920404076576233, + "eval_runtime": 6.7319, + "eval_samples_per_second": 535.952, + "eval_steps_per_second": 8.467, + "step": 765 + }, + { + "epoch": 18.35, + "learning_rate": 7.4291746293526835e-06, + "loss": 0.763, + "step": 780 + }, + { + "epoch": 18.96, + "learning_rate": 6.83769616613893e-06, + "loss": 0.7668, + "step": 806 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.9176829268292683, + "eval_loss": 0.8005101680755615, + "eval_runtime": 6.8523, + "eval_samples_per_second": 526.54, + "eval_steps_per_second": 8.318, + "step": 807 + }, + { + "epoch": 19.58, + "learning_rate": 6.241255371861648e-06, + "loss": 0.7757, + "step": 832 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.9215631929046563, + "eval_loss": 0.7925511598587036, + "eval_runtime": 6.8107, + "eval_samples_per_second": 529.755, + "eval_steps_per_second": 8.369, + "step": 850 + }, + { + "epoch": 20.19, + "learning_rate": 5.645228631724122e-06, + "loss": 0.7652, + "step": 858 + }, + { + "epoch": 20.8, + "learning_rate": 5.054988598598487e-06, + "loss": 0.7594, + "step": 884 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.9149113082039911, + "eval_loss": 0.7976831197738647, + "eval_runtime": 6.8159, + "eval_samples_per_second": 529.347, + "eval_steps_per_second": 8.363, + "step": 892 + }, + { + "epoch": 21.41, + "learning_rate": 4.475855763321266e-06, + "loss": 0.7625, + "step": 910 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.7974503636360168, + "eval_runtime": 6.7271, + "eval_samples_per_second": 536.335, + "eval_steps_per_second": 8.473, + "step": 935 + }, + { + "epoch": 22.02, + "learning_rate": 3.913050495183416e-06, + "loss": 0.7634, + "step": 936 + }, + { + "epoch": 22.64, + "learning_rate": 3.3716459849273846e-06, + "loss": 0.7595, + "step": 962 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.9187915742793792, + "eval_loss": 0.7927106618881226, + "eval_runtime": 6.76, + "eval_samples_per_second": 533.731, + "eval_steps_per_second": 8.432, + "step": 977 + }, + { + "epoch": 23.25, + "learning_rate": 2.8565225144283456e-06, + "loss": 0.7638, + "step": 988 + }, + { + "epoch": 23.86, + "learning_rate": 2.372323465277948e-06, + "loss": 0.7617, + "step": 1014 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.7929773926734924, + "eval_runtime": 6.658, + "eval_samples_per_second": 541.908, + "eval_steps_per_second": 8.561, + "step": 1020 + }, + { + "epoch": 24.47, + "learning_rate": 1.9234134628141524e-06, + "loss": 0.7616, + "step": 1040 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.916019955654102, + "eval_loss": 0.7986940145492554, + "eval_runtime": 6.708, + "eval_samples_per_second": 537.863, + "eval_steps_per_second": 8.497, + "step": 1062 + }, + { + "epoch": 25.08, + "learning_rate": 1.5138390328916734e-06, + "loss": 0.7548, + "step": 1066 + }, + { + "epoch": 25.69, + "learning_rate": 1.1472921260373756e-06, + "loss": 0.7591, + "step": 1092 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.9171286031042128, + "eval_loss": 0.7983978390693665, + "eval_runtime": 6.6189, + "eval_samples_per_second": 545.109, + "eval_steps_per_second": 8.612, + "step": 1105 + }, + { + "epoch": 26.31, + "learning_rate": 8.270768377880337e-07, + "loss": 0.7592, + "step": 1118 + }, + { + "epoch": 26.92, + "learning_rate": 5.560796251970973e-07, + "loss": 0.76, + "step": 1144 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.9174057649667405, + "eval_loss": 0.7961049675941467, + "eval_runtime": 6.8384, + "eval_samples_per_second": 527.612, + "eval_steps_per_second": 8.335, + "step": 1147 + }, + { + "epoch": 27.53, + "learning_rate": 3.367432879822969e-07, + "loss": 0.7592, + "step": 1170 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.9212860310421286, + "eval_loss": 0.790411114692688, + "eval_runtime": 6.5864, + "eval_samples_per_second": 547.794, + "eval_steps_per_second": 8.654, + "step": 1190 + }, + { + "epoch": 28.14, + "learning_rate": 1.710449488509648e-07, + "loss": 0.7542, + "step": 1196 + }, + { + "epoch": 28.75, + "learning_rate": 6.047823149091707e-08, + "loss": 0.751, + "step": 1222 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.917960088691796, + "eval_loss": 0.7922791838645935, + "eval_runtime": 6.9335, + "eval_samples_per_second": 520.372, + "eval_steps_per_second": 8.221, + "step": 1232 + }, + { + "epoch": 29.36, + "learning_rate": 6.0397968765439525e-09, + "loss": 0.7603, + "step": 1248 + }, + { + "epoch": 29.65, + "eval_accuracy": 0.9235033259423503, + "eval_loss": 0.7906513810157776, + "eval_runtime": 6.9252, + "eval_samples_per_second": 520.999, + "eval_steps_per_second": 8.231, + "step": 1260 + } + ], + "logging_steps": 26, + "max_steps": 1260, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "_wandb": {}, + "assignments": {}, + "learning_rate": 1.2574378707290284e-05, + "metric": "eval/loss", + "warmup_ratio": 0.31743684059205113 + } +} diff --git a/run-zcxpn54a/checkpoint-1260/training_args.bin b/run-zcxpn54a/checkpoint-1260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6fd235e9c43b1a9386610710bc68e64abb9f21e3 --- /dev/null +++ b/run-zcxpn54a/checkpoint-1260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa1f626ebe3a2431be8d9cfac9ce1a12b779a7fda37da809ab7f6252812f4dc +size 4792 diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..69cf9041d6832e9e332d836b0e2fc0063dbd70d2 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:684366f4845287171fbcb75c71cb004c439f4d6f1138db58aebf8d62f593ca9b +size 4856