distillBEiT / trainer_state.json
sylvain471's picture
Upload 8 files
2e58e96 verified
{
"best_metric": 0.8571428571428571,
"best_model_checkpoint": "distillBEiT/checkpoints/checkpoint-1953",
"epoch": 31.0,
"eval_steps": 500,
"global_step": 1953,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 4.9126984126984125e-05,
"loss": 6.699,
"step": 63
},
{
"epoch": 1.0,
"eval_accuracy": 0.14285714285714285,
"eval_loss": 11.804450035095215,
"eval_runtime": 2.7027,
"eval_samples_per_second": 20.72,
"eval_steps_per_second": 2.59,
"step": 63
},
{
"epoch": 2.0,
"learning_rate": 4.812698412698413e-05,
"loss": 4.176,
"step": 126
},
{
"epoch": 2.0,
"eval_accuracy": 0.44642857142857145,
"eval_loss": 8.514544486999512,
"eval_runtime": 2.7591,
"eval_samples_per_second": 20.297,
"eval_steps_per_second": 2.537,
"step": 126
},
{
"epoch": 3.0,
"learning_rate": 4.712698412698413e-05,
"loss": 2.6846,
"step": 189
},
{
"epoch": 3.0,
"eval_accuracy": 0.125,
"eval_loss": 26.979318618774414,
"eval_runtime": 2.7027,
"eval_samples_per_second": 20.72,
"eval_steps_per_second": 2.59,
"step": 189
},
{
"epoch": 4.0,
"learning_rate": 4.612698412698413e-05,
"loss": 2.2263,
"step": 252
},
{
"epoch": 4.0,
"eval_accuracy": 0.08928571428571429,
"eval_loss": 33.445133209228516,
"eval_runtime": 2.6746,
"eval_samples_per_second": 20.937,
"eval_steps_per_second": 2.617,
"step": 252
},
{
"epoch": 5.0,
"learning_rate": 4.512698412698413e-05,
"loss": 1.8895,
"step": 315
},
{
"epoch": 5.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 8.057156562805176,
"eval_runtime": 2.7714,
"eval_samples_per_second": 20.207,
"eval_steps_per_second": 2.526,
"step": 315
},
{
"epoch": 6.0,
"learning_rate": 4.4126984126984126e-05,
"loss": 1.525,
"step": 378
},
{
"epoch": 6.0,
"eval_accuracy": 0.4107142857142857,
"eval_loss": 8.990975379943848,
"eval_runtime": 2.745,
"eval_samples_per_second": 20.4,
"eval_steps_per_second": 2.55,
"step": 378
},
{
"epoch": 7.0,
"learning_rate": 4.312698412698413e-05,
"loss": 1.4566,
"step": 441
},
{
"epoch": 7.0,
"eval_accuracy": 0.5714285714285714,
"eval_loss": 6.924566745758057,
"eval_runtime": 2.7577,
"eval_samples_per_second": 20.307,
"eval_steps_per_second": 2.538,
"step": 441
},
{
"epoch": 8.0,
"learning_rate": 4.212698412698413e-05,
"loss": 1.3691,
"step": 504
},
{
"epoch": 8.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 11.207305908203125,
"eval_runtime": 2.6969,
"eval_samples_per_second": 20.764,
"eval_steps_per_second": 2.596,
"step": 504
},
{
"epoch": 9.0,
"learning_rate": 4.112698412698413e-05,
"loss": 1.168,
"step": 567
},
{
"epoch": 9.0,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 6.766927242279053,
"eval_runtime": 2.8201,
"eval_samples_per_second": 19.857,
"eval_steps_per_second": 2.482,
"step": 567
},
{
"epoch": 10.0,
"learning_rate": 4.012698412698413e-05,
"loss": 1.1166,
"step": 630
},
{
"epoch": 10.0,
"eval_accuracy": 0.7857142857142857,
"eval_loss": 4.691386699676514,
"eval_runtime": 2.7695,
"eval_samples_per_second": 20.22,
"eval_steps_per_second": 2.528,
"step": 630
},
{
"epoch": 11.0,
"learning_rate": 3.9126984126984126e-05,
"loss": 1.0649,
"step": 693
},
{
"epoch": 11.0,
"eval_accuracy": 0.7678571428571429,
"eval_loss": 5.042652606964111,
"eval_runtime": 2.6881,
"eval_samples_per_second": 20.833,
"eval_steps_per_second": 2.604,
"step": 693
},
{
"epoch": 12.0,
"learning_rate": 3.812698412698413e-05,
"loss": 0.97,
"step": 756
},
{
"epoch": 12.0,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 6.7766571044921875,
"eval_runtime": 2.6425,
"eval_samples_per_second": 21.192,
"eval_steps_per_second": 2.649,
"step": 756
},
{
"epoch": 13.0,
"learning_rate": 3.7126984126984125e-05,
"loss": 1.0896,
"step": 819
},
{
"epoch": 13.0,
"eval_accuracy": 0.8035714285714286,
"eval_loss": 11.280374526977539,
"eval_runtime": 2.7964,
"eval_samples_per_second": 20.026,
"eval_steps_per_second": 2.503,
"step": 819
},
{
"epoch": 14.0,
"learning_rate": 3.6126984126984135e-05,
"loss": 0.9401,
"step": 882
},
{
"epoch": 14.0,
"eval_accuracy": 0.6607142857142857,
"eval_loss": 11.328347206115723,
"eval_runtime": 2.8029,
"eval_samples_per_second": 19.979,
"eval_steps_per_second": 2.497,
"step": 882
},
{
"epoch": 15.0,
"learning_rate": 3.512698412698413e-05,
"loss": 0.9331,
"step": 945
},
{
"epoch": 15.0,
"eval_accuracy": 0.6964285714285714,
"eval_loss": 12.213973999023438,
"eval_runtime": 2.6558,
"eval_samples_per_second": 21.086,
"eval_steps_per_second": 2.636,
"step": 945
},
{
"epoch": 16.0,
"learning_rate": 3.412698412698413e-05,
"loss": 0.8237,
"step": 1008
},
{
"epoch": 16.0,
"eval_accuracy": 0.7321428571428571,
"eval_loss": 7.346871852874756,
"eval_runtime": 2.7127,
"eval_samples_per_second": 20.644,
"eval_steps_per_second": 2.58,
"step": 1008
},
{
"epoch": 17.0,
"learning_rate": 3.312698412698413e-05,
"loss": 0.7515,
"step": 1071
},
{
"epoch": 17.0,
"eval_accuracy": 0.7857142857142857,
"eval_loss": 4.754016399383545,
"eval_runtime": 2.7003,
"eval_samples_per_second": 20.738,
"eval_steps_per_second": 2.592,
"step": 1071
},
{
"epoch": 18.0,
"learning_rate": 3.2126984126984126e-05,
"loss": 0.7622,
"step": 1134
},
{
"epoch": 18.0,
"eval_accuracy": 0.7857142857142857,
"eval_loss": 4.091554641723633,
"eval_runtime": 2.7272,
"eval_samples_per_second": 20.534,
"eval_steps_per_second": 2.567,
"step": 1134
},
{
"epoch": 19.0,
"learning_rate": 3.112698412698413e-05,
"loss": 0.853,
"step": 1197
},
{
"epoch": 19.0,
"eval_accuracy": 0.16071428571428573,
"eval_loss": 12.633813858032227,
"eval_runtime": 2.6777,
"eval_samples_per_second": 20.914,
"eval_steps_per_second": 2.614,
"step": 1197
},
{
"epoch": 20.0,
"learning_rate": 3.0126984126984124e-05,
"loss": 0.6455,
"step": 1260
},
{
"epoch": 20.0,
"eval_accuracy": 0.7678571428571429,
"eval_loss": 9.073665618896484,
"eval_runtime": 2.7224,
"eval_samples_per_second": 20.57,
"eval_steps_per_second": 2.571,
"step": 1260
},
{
"epoch": 21.0,
"learning_rate": 2.912698412698413e-05,
"loss": 0.6667,
"step": 1323
},
{
"epoch": 21.0,
"eval_accuracy": 0.7321428571428571,
"eval_loss": 4.806458950042725,
"eval_runtime": 2.6589,
"eval_samples_per_second": 21.062,
"eval_steps_per_second": 2.633,
"step": 1323
},
{
"epoch": 22.0,
"learning_rate": 2.812698412698413e-05,
"loss": 0.689,
"step": 1386
},
{
"epoch": 22.0,
"eval_accuracy": 0.48214285714285715,
"eval_loss": 9.992805480957031,
"eval_runtime": 2.6988,
"eval_samples_per_second": 20.75,
"eval_steps_per_second": 2.594,
"step": 1386
},
{
"epoch": 23.0,
"learning_rate": 2.712698412698413e-05,
"loss": 0.6914,
"step": 1449
},
{
"epoch": 23.0,
"eval_accuracy": 0.5357142857142857,
"eval_loss": 18.975879669189453,
"eval_runtime": 2.6271,
"eval_samples_per_second": 21.316,
"eval_steps_per_second": 2.665,
"step": 1449
},
{
"epoch": 24.0,
"learning_rate": 2.612698412698413e-05,
"loss": 0.677,
"step": 1512
},
{
"epoch": 24.0,
"eval_accuracy": 0.125,
"eval_loss": 15.608841896057129,
"eval_runtime": 2.674,
"eval_samples_per_second": 20.942,
"eval_steps_per_second": 2.618,
"step": 1512
},
{
"epoch": 25.0,
"learning_rate": 2.5126984126984128e-05,
"loss": 0.6575,
"step": 1575
},
{
"epoch": 25.0,
"eval_accuracy": 0.625,
"eval_loss": 13.633025169372559,
"eval_runtime": 2.6744,
"eval_samples_per_second": 20.94,
"eval_steps_per_second": 2.617,
"step": 1575
},
{
"epoch": 26.0,
"learning_rate": 2.4126984126984128e-05,
"loss": 0.5564,
"step": 1638
},
{
"epoch": 26.0,
"eval_accuracy": 0.375,
"eval_loss": 9.142752647399902,
"eval_runtime": 2.7086,
"eval_samples_per_second": 20.675,
"eval_steps_per_second": 2.584,
"step": 1638
},
{
"epoch": 27.0,
"learning_rate": 2.3126984126984127e-05,
"loss": 0.7167,
"step": 1701
},
{
"epoch": 27.0,
"eval_accuracy": 0.75,
"eval_loss": 12.48905086517334,
"eval_runtime": 2.6896,
"eval_samples_per_second": 20.821,
"eval_steps_per_second": 2.603,
"step": 1701
},
{
"epoch": 28.0,
"learning_rate": 2.212698412698413e-05,
"loss": 0.5167,
"step": 1764
},
{
"epoch": 28.0,
"eval_accuracy": 0.7857142857142857,
"eval_loss": 6.086201190948486,
"eval_runtime": 2.6432,
"eval_samples_per_second": 21.186,
"eval_steps_per_second": 2.648,
"step": 1764
},
{
"epoch": 29.0,
"learning_rate": 2.112698412698413e-05,
"loss": 0.5433,
"step": 1827
},
{
"epoch": 29.0,
"eval_accuracy": 0.375,
"eval_loss": 10.653279304504395,
"eval_runtime": 2.7065,
"eval_samples_per_second": 20.691,
"eval_steps_per_second": 2.586,
"step": 1827
},
{
"epoch": 30.0,
"learning_rate": 2.012698412698413e-05,
"loss": 0.4732,
"step": 1890
},
{
"epoch": 30.0,
"eval_accuracy": 0.7321428571428571,
"eval_loss": 5.170372009277344,
"eval_runtime": 2.6871,
"eval_samples_per_second": 20.841,
"eval_steps_per_second": 2.605,
"step": 1890
},
{
"epoch": 31.0,
"learning_rate": 1.9126984126984128e-05,
"loss": 0.4945,
"step": 1953
},
{
"epoch": 31.0,
"eval_accuracy": 0.8571428571428571,
"eval_loss": 3.8952624797821045,
"eval_runtime": 2.7235,
"eval_samples_per_second": 20.562,
"eval_steps_per_second": 2.57,
"step": 1953
}
],
"logging_steps": 500,
"max_steps": 3150,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 3.126627671021568e+16,
"trial_name": null,
"trial_params": null
}