hf-distilbert-imdb-mlm / trainer_state.json
Edison
Model save
908c75e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"global_step": 7660,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.9005221932114882e-05,
"loss": 2.5049,
"step": 383
},
{
"epoch": 1.0,
"eval_loss": 2.3055639266967773,
"eval_runtime": 6.5178,
"eval_samples_per_second": 940.197,
"eval_steps_per_second": 14.729,
"step": 383
},
{
"epoch": 2.0,
"learning_rate": 1.8005221932114885e-05,
"loss": 2.3896,
"step": 766
},
{
"epoch": 2.0,
"eval_loss": 2.246039390563965,
"eval_runtime": 6.5445,
"eval_samples_per_second": 936.352,
"eval_steps_per_second": 14.669,
"step": 766
},
{
"epoch": 3.0,
"learning_rate": 1.7005221932114885e-05,
"loss": 2.3458,
"step": 1149
},
{
"epoch": 3.0,
"eval_loss": 2.2351295948028564,
"eval_runtime": 6.5331,
"eval_samples_per_second": 937.992,
"eval_steps_per_second": 14.694,
"step": 1149
},
{
"epoch": 4.0,
"learning_rate": 1.6005221932114884e-05,
"loss": 2.3097,
"step": 1532
},
{
"epoch": 4.0,
"eval_loss": 2.1917026042938232,
"eval_runtime": 6.5235,
"eval_samples_per_second": 939.379,
"eval_steps_per_second": 14.716,
"step": 1532
},
{
"epoch": 5.0,
"learning_rate": 1.5005221932114883e-05,
"loss": 2.2839,
"step": 1915
},
{
"epoch": 5.0,
"eval_loss": 2.193546772003174,
"eval_runtime": 6.5261,
"eval_samples_per_second": 938.992,
"eval_steps_per_second": 14.71,
"step": 1915
},
{
"epoch": 6.0,
"learning_rate": 1.4005221932114883e-05,
"loss": 2.2611,
"step": 2298
},
{
"epoch": 6.0,
"eval_loss": 2.174062490463257,
"eval_runtime": 6.5308,
"eval_samples_per_second": 938.316,
"eval_steps_per_second": 14.699,
"step": 2298
},
{
"epoch": 7.0,
"learning_rate": 1.3005221932114884e-05,
"loss": 2.2397,
"step": 2681
},
{
"epoch": 7.0,
"eval_loss": 2.151566743850708,
"eval_runtime": 6.525,
"eval_samples_per_second": 939.154,
"eval_steps_per_second": 14.713,
"step": 2681
},
{
"epoch": 8.0,
"learning_rate": 1.2005221932114883e-05,
"loss": 2.2234,
"step": 3064
},
{
"epoch": 8.0,
"eval_loss": 2.14640474319458,
"eval_runtime": 6.531,
"eval_samples_per_second": 938.292,
"eval_steps_per_second": 14.699,
"step": 3064
},
{
"epoch": 9.0,
"learning_rate": 1.1005221932114883e-05,
"loss": 2.2121,
"step": 3447
},
{
"epoch": 9.0,
"eval_loss": 2.124241590499878,
"eval_runtime": 6.5412,
"eval_samples_per_second": 936.826,
"eval_steps_per_second": 14.676,
"step": 3447
},
{
"epoch": 10.0,
"learning_rate": 1.0005221932114884e-05,
"loss": 2.2041,
"step": 3830
},
{
"epoch": 10.0,
"eval_loss": 2.1360511779785156,
"eval_runtime": 6.5352,
"eval_samples_per_second": 937.687,
"eval_steps_per_second": 14.69,
"step": 3830
},
{
"epoch": 11.0,
"learning_rate": 9.005221932114883e-06,
"loss": 2.1883,
"step": 4213
},
{
"epoch": 11.0,
"eval_loss": 2.1251063346862793,
"eval_runtime": 6.5334,
"eval_samples_per_second": 937.956,
"eval_steps_per_second": 14.694,
"step": 4213
},
{
"epoch": 12.0,
"learning_rate": 8.005221932114883e-06,
"loss": 2.185,
"step": 4596
},
{
"epoch": 12.0,
"eval_loss": 2.1296956539154053,
"eval_runtime": 6.5234,
"eval_samples_per_second": 939.386,
"eval_steps_per_second": 14.716,
"step": 4596
},
{
"epoch": 13.0,
"learning_rate": 7.005221932114883e-06,
"loss": 2.1712,
"step": 4979
},
{
"epoch": 13.0,
"eval_loss": 2.1061811447143555,
"eval_runtime": 6.5182,
"eval_samples_per_second": 940.138,
"eval_steps_per_second": 14.728,
"step": 4979
},
{
"epoch": 14.0,
"learning_rate": 6.005221932114883e-06,
"loss": 2.1648,
"step": 5362
},
{
"epoch": 14.0,
"eval_loss": 2.1048877239227295,
"eval_runtime": 6.5157,
"eval_samples_per_second": 940.496,
"eval_steps_per_second": 14.734,
"step": 5362
},
{
"epoch": 15.0,
"learning_rate": 5.005221932114883e-06,
"loss": 2.1587,
"step": 5745
},
{
"epoch": 15.0,
"eval_loss": 2.106553792953491,
"eval_runtime": 6.52,
"eval_samples_per_second": 939.88,
"eval_steps_per_second": 14.724,
"step": 5745
},
{
"epoch": 16.0,
"learning_rate": 4.005221932114883e-06,
"loss": 2.1532,
"step": 6128
},
{
"epoch": 16.0,
"eval_loss": 2.0981085300445557,
"eval_runtime": 6.5377,
"eval_samples_per_second": 937.338,
"eval_steps_per_second": 14.684,
"step": 6128
},
{
"epoch": 17.0,
"learning_rate": 3.005221932114883e-06,
"loss": 2.1472,
"step": 6511
},
{
"epoch": 17.0,
"eval_loss": 2.0925848484039307,
"eval_runtime": 6.5057,
"eval_samples_per_second": 941.95,
"eval_steps_per_second": 14.756,
"step": 6511
},
{
"epoch": 18.0,
"learning_rate": 2.005221932114883e-06,
"loss": 2.1462,
"step": 6894
},
{
"epoch": 18.0,
"eval_loss": 2.083235025405884,
"eval_runtime": 6.5322,
"eval_samples_per_second": 938.118,
"eval_steps_per_second": 14.696,
"step": 6894
},
{
"epoch": 19.0,
"learning_rate": 1.0052219321148825e-06,
"loss": 2.1437,
"step": 7277
},
{
"epoch": 19.0,
"eval_loss": 2.093729257583618,
"eval_runtime": 6.5339,
"eval_samples_per_second": 937.883,
"eval_steps_per_second": 14.693,
"step": 7277
},
{
"epoch": 20.0,
"learning_rate": 5.2219321148825064e-09,
"loss": 2.1386,
"step": 7660
},
{
"epoch": 20.0,
"eval_loss": 2.0927391052246094,
"eval_runtime": 6.5139,
"eval_samples_per_second": 940.762,
"eval_steps_per_second": 14.738,
"step": 7660
},
{
"epoch": 20.0,
"step": 7660,
"total_flos": 3.24907393591296e+16,
"train_loss": 2.228582644151334,
"train_runtime": 1800.6594,
"train_samples_per_second": 272.234,
"train_steps_per_second": 4.254
}
],
"max_steps": 7660,
"num_train_epochs": 20,
"total_flos": 3.24907393591296e+16,
"trial_name": null,
"trial_params": null
}