hoan's picture
Training in progress, epoch 1
7d31691 verified
{
"best_metric": 0.9273835920177383,
"best_model_checkpoint": "/home/hoan/projects/nsfw/models/efficientvit_l1.r224_in1k/run-2787ye3h/checkpoint-1147",
"epoch": 26.988235294117647,
"eval_steps": 500,
"global_step": 1147,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.61,
"learning_rate": 3.626442337305279e-05,
"loss": 1.293,
"step": 26
},
{
"epoch": 0.99,
"eval_accuracy": 0.8755543237250555,
"eval_loss": 0.9349600672721863,
"eval_runtime": 6.9886,
"eval_samples_per_second": 516.266,
"eval_steps_per_second": 8.156,
"step": 42
},
{
"epoch": 1.22,
"learning_rate": 7.252884674610559e-05,
"loss": 0.9064,
"step": 52
},
{
"epoch": 1.84,
"learning_rate": 0.00010879327011915836,
"loss": 0.8316,
"step": 78
},
{
"epoch": 2.0,
"eval_accuracy": 0.914079822616408,
"eval_loss": 0.8049944639205933,
"eval_runtime": 6.9141,
"eval_samples_per_second": 521.829,
"eval_steps_per_second": 8.244,
"step": 85
},
{
"epoch": 2.45,
"learning_rate": 0.00014505769349221117,
"loss": 0.7997,
"step": 104
},
{
"epoch": 2.99,
"eval_accuracy": 0.9126940133037694,
"eval_loss": 0.8133141398429871,
"eval_runtime": 6.6693,
"eval_samples_per_second": 540.987,
"eval_steps_per_second": 8.547,
"step": 127
},
{
"epoch": 3.06,
"learning_rate": 0.00018132211686526395,
"loss": 0.7971,
"step": 130
},
{
"epoch": 3.67,
"learning_rate": 0.00018108536618532123,
"loss": 0.7806,
"step": 156
},
{
"epoch": 4.0,
"eval_accuracy": 0.9182372505543237,
"eval_loss": 0.8043554425239563,
"eval_runtime": 6.9887,
"eval_samples_per_second": 516.26,
"eval_steps_per_second": 8.156,
"step": 170
},
{
"epoch": 4.28,
"learning_rate": 0.00018037635063854584,
"loss": 0.7788,
"step": 182
},
{
"epoch": 4.89,
"learning_rate": 0.00017919877324618395,
"loss": 0.764,
"step": 208
},
{
"epoch": 4.99,
"eval_accuracy": 0.917960088691796,
"eval_loss": 0.7976019382476807,
"eval_runtime": 6.7358,
"eval_samples_per_second": 535.642,
"eval_steps_per_second": 8.462,
"step": 212
},
{
"epoch": 5.51,
"learning_rate": 0.00017755878421766717,
"loss": 0.7658,
"step": 234
},
{
"epoch": 6.0,
"eval_accuracy": 0.9016075388026608,
"eval_loss": 0.8304137587547302,
"eval_runtime": 6.8918,
"eval_samples_per_second": 523.522,
"eval_steps_per_second": 8.271,
"step": 255
},
{
"epoch": 6.12,
"learning_rate": 0.00017546494882951695,
"loss": 0.751,
"step": 260
},
{
"epoch": 6.73,
"learning_rate": 0.00017292820269092213,
"loss": 0.7533,
"step": 286
},
{
"epoch": 6.99,
"eval_accuracy": 0.9049334811529933,
"eval_loss": 0.8278865218162537,
"eval_runtime": 6.9038,
"eval_samples_per_second": 522.607,
"eval_steps_per_second": 8.256,
"step": 297
},
{
"epoch": 7.34,
"learning_rate": 0.00016996179462962718,
"loss": 0.747,
"step": 312
},
{
"epoch": 7.95,
"learning_rate": 0.00016658121749642477,
"loss": 0.7294,
"step": 338
},
{
"epoch": 8.0,
"eval_accuracy": 0.9054878048780488,
"eval_loss": 0.8298162817955017,
"eval_runtime": 6.8854,
"eval_samples_per_second": 524.008,
"eval_steps_per_second": 8.278,
"step": 340
},
{
"epoch": 8.56,
"learning_rate": 0.0001628041272496441,
"loss": 0.7325,
"step": 364
},
{
"epoch": 8.99,
"eval_accuracy": 0.9174057649667405,
"eval_loss": 0.8046008944511414,
"eval_runtime": 6.8095,
"eval_samples_per_second": 529.85,
"eval_steps_per_second": 8.371,
"step": 382
},
{
"epoch": 9.18,
"learning_rate": 0.0001586502507422381,
"loss": 0.737,
"step": 390
},
{
"epoch": 9.79,
"learning_rate": 0.0001541412826930757,
"loss": 0.7235,
"step": 416
},
{
"epoch": 10.0,
"eval_accuracy": 0.917960088691796,
"eval_loss": 0.8011636734008789,
"eval_runtime": 7.0572,
"eval_samples_per_second": 511.252,
"eval_steps_per_second": 8.077,
"step": 425
},
{
"epoch": 10.4,
"learning_rate": 0.00014930077238053366,
"loss": 0.7212,
"step": 442
},
{
"epoch": 10.99,
"eval_accuracy": 0.9154656319290465,
"eval_loss": 0.8091404438018799,
"eval_runtime": 6.6144,
"eval_samples_per_second": 545.478,
"eval_steps_per_second": 8.618,
"step": 467
},
{
"epoch": 11.01,
"learning_rate": 0.00014415400065016071,
"loss": 0.7204,
"step": 468
},
{
"epoch": 11.62,
"learning_rate": 0.00013872784787877452,
"loss": 0.7154,
"step": 494
},
{
"epoch": 12.0,
"eval_accuracy": 0.8747228381374723,
"eval_loss": 0.8664525747299194,
"eval_runtime": 6.7378,
"eval_samples_per_second": 535.484,
"eval_steps_per_second": 8.46,
"step": 510
},
{
"epoch": 12.24,
"learning_rate": 0.00013305065358458295,
"loss": 0.7148,
"step": 520
},
{
"epoch": 12.85,
"learning_rate": 0.00012715206841655379,
"loss": 0.7122,
"step": 546
},
{
"epoch": 12.99,
"eval_accuracy": 0.9154656319290465,
"eval_loss": 0.8063619136810303,
"eval_runtime": 6.798,
"eval_samples_per_second": 530.746,
"eval_steps_per_second": 8.385,
"step": 552
},
{
"epoch": 13.46,
"learning_rate": 0.00012106289929605711,
"loss": 0.7096,
"step": 572
},
{
"epoch": 14.0,
"eval_accuracy": 0.9162971175166297,
"eval_loss": 0.8059455752372742,
"eval_runtime": 6.7495,
"eval_samples_per_second": 534.558,
"eval_steps_per_second": 8.445,
"step": 595
},
{
"epoch": 14.07,
"learning_rate": 0.00011481494851956916,
"loss": 0.7157,
"step": 598
},
{
"epoch": 14.68,
"learning_rate": 0.0001084408476627666,
"loss": 0.7021,
"step": 624
},
{
"epoch": 14.99,
"eval_accuracy": 0.9212860310421286,
"eval_loss": 0.8017489314079285,
"eval_runtime": 6.9058,
"eval_samples_per_second": 522.457,
"eval_steps_per_second": 8.254,
"step": 637
},
{
"epoch": 15.29,
"learning_rate": 0.0001019738871534904,
"loss": 0.7021,
"step": 650
},
{
"epoch": 15.91,
"learning_rate": 9.544784240368045e-05,
"loss": 0.7002,
"step": 676
},
{
"epoch": 16.0,
"eval_accuracy": 0.9157427937915743,
"eval_loss": 0.816520631313324,
"eval_runtime": 6.8,
"eval_samples_per_second": 530.585,
"eval_steps_per_second": 8.382,
"step": 680
},
{
"epoch": 16.52,
"learning_rate": 8.889679740835216e-05,
"loss": 0.705,
"step": 702
},
{
"epoch": 16.99,
"eval_accuracy": 0.9196230598669624,
"eval_loss": 0.8022974133491516,
"eval_runtime": 6.9057,
"eval_samples_per_second": 522.468,
"eval_steps_per_second": 8.254,
"step": 722
},
{
"epoch": 17.13,
"learning_rate": 8.235496673291634e-05,
"loss": 0.7031,
"step": 728
},
{
"epoch": 17.74,
"learning_rate": 7.585651681856075e-05,
"loss": 0.6987,
"step": 754
},
{
"epoch": 18.0,
"eval_accuracy": 0.9154656319290465,
"eval_loss": 0.8067501783370972,
"eval_runtime": 6.7889,
"eval_samples_per_second": 531.457,
"eval_steps_per_second": 8.396,
"step": 765
},
{
"epoch": 18.35,
"learning_rate": 6.943538753897311e-05,
"loss": 0.6986,
"step": 780
},
{
"epoch": 18.96,
"learning_rate": 6.312511494037267e-05,
"loss": 0.698,
"step": 806
},
{
"epoch": 18.99,
"eval_accuracy": 0.9262749445676275,
"eval_loss": 0.7983365058898926,
"eval_runtime": 6.7297,
"eval_samples_per_second": 536.129,
"eval_steps_per_second": 8.47,
"step": 807
},
{
"epoch": 19.58,
"learning_rate": 5.695865609063774e-05,
"loss": 0.6899,
"step": 832
},
{
"epoch": 20.0,
"eval_accuracy": 0.9174057649667405,
"eval_loss": 0.8008666038513184,
"eval_runtime": 6.6376,
"eval_samples_per_second": 543.571,
"eval_steps_per_second": 8.587,
"step": 850
},
{
"epoch": 20.19,
"learning_rate": 5.0968216952300726e-05,
"loss": 0.6935,
"step": 858
},
{
"epoch": 20.8,
"learning_rate": 4.518508417838943e-05,
"loss": 0.6922,
"step": 884
},
{
"epoch": 20.99,
"eval_accuracy": 0.9221175166297118,
"eval_loss": 0.8021017909049988,
"eval_runtime": 6.9244,
"eval_samples_per_second": 521.053,
"eval_steps_per_second": 8.232,
"step": 892
},
{
"epoch": 21.41,
"learning_rate": 3.9639461709605394e-05,
"loss": 0.6895,
"step": 910
},
{
"epoch": 22.0,
"eval_accuracy": 0.9240576496674058,
"eval_loss": 0.7964810729026794,
"eval_runtime": 6.7682,
"eval_samples_per_second": 533.077,
"eval_steps_per_second": 8.422,
"step": 935
},
{
"epoch": 22.02,
"learning_rate": 3.436031302625408e-05,
"loss": 0.6875,
"step": 936
},
{
"epoch": 22.64,
"learning_rate": 2.937520987880715e-05,
"loss": 0.6865,
"step": 962
},
{
"epoch": 22.99,
"eval_accuracy": 0.9229490022172949,
"eval_loss": 0.7992194294929504,
"eval_runtime": 6.8278,
"eval_samples_per_second": 528.427,
"eval_steps_per_second": 8.348,
"step": 977
},
{
"epoch": 23.25,
"learning_rate": 2.4710188287142336e-05,
"loss": 0.6886,
"step": 988
},
{
"epoch": 23.86,
"learning_rate": 2.0389612560542293e-05,
"loss": 0.6874,
"step": 1014
},
{
"epoch": 24.0,
"eval_accuracy": 0.9243348115299335,
"eval_loss": 0.794614851474762,
"eval_runtime": 7.0369,
"eval_samples_per_second": 512.726,
"eval_steps_per_second": 8.1,
"step": 1020
},
{
"epoch": 24.47,
"learning_rate": 1.6436048048644487e-05,
"loss": 0.6936,
"step": 1040
},
{
"epoch": 24.99,
"eval_accuracy": 0.924889135254989,
"eval_loss": 0.7939379811286926,
"eval_runtime": 6.7889,
"eval_samples_per_second": 531.455,
"eval_steps_per_second": 8.396,
"step": 1062
},
{
"epoch": 25.08,
"learning_rate": 1.2870143287933509e-05,
"loss": 0.6864,
"step": 1066
},
{
"epoch": 25.69,
"learning_rate": 9.710522159297248e-06,
"loss": 0.6875,
"step": 1092
},
{
"epoch": 26.0,
"eval_accuracy": 0.926829268292683,
"eval_loss": 0.7930501699447632,
"eval_runtime": 6.6519,
"eval_samples_per_second": 542.402,
"eval_steps_per_second": 8.569,
"step": 1105
},
{
"epoch": 26.31,
"learning_rate": 6.9736866198824056e-06,
"loss": 0.6869,
"step": 1118
},
{
"epoch": 26.92,
"learning_rate": 4.67393051725817e-06,
"loss": 0.6863,
"step": 1144
},
{
"epoch": 26.99,
"eval_accuracy": 0.9273835920177383,
"eval_loss": 0.7957404851913452,
"eval_runtime": 6.6539,
"eval_samples_per_second": 542.236,
"eval_steps_per_second": 8.566,
"step": 1147
}
],
"logging_steps": 26,
"max_steps": 1260,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": {
"_wandb": {},
"assignments": {},
"learning_rate": 0.00018132211686526395,
"metric": "eval/loss",
"warmup_ratio": 0.1030284831764748
}
}