hBERTv1_new_pretrain_w_init__sst2 / trainer_state.json
gokuls's picture
End of training
a3a117d
{
"best_metric": 0.4606306850910187,
"best_model_checkpoint": "hBERTv1_new_pretrain_w_init__sst2/checkpoint-527",
"epoch": 6.0,
"global_step": 3162,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 3.9200000000000004e-05,
"loss": 0.363,
"step": 527
},
{
"epoch": 1.0,
"eval_accuracy": 0.8038990825688074,
"eval_loss": 0.4606306850910187,
"eval_runtime": 1.5758,
"eval_samples_per_second": 553.365,
"eval_steps_per_second": 4.442,
"step": 527
},
{
"epoch": 2.0,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.2256,
"step": 1054
},
{
"epoch": 2.0,
"eval_accuracy": 0.8119266055045872,
"eval_loss": 0.6466029286384583,
"eval_runtime": 1.581,
"eval_samples_per_second": 551.543,
"eval_steps_per_second": 4.428,
"step": 1054
},
{
"epoch": 3.0,
"learning_rate": 3.76e-05,
"loss": 0.1754,
"step": 1581
},
{
"epoch": 3.0,
"eval_accuracy": 0.8176605504587156,
"eval_loss": 0.5100513696670532,
"eval_runtime": 1.5786,
"eval_samples_per_second": 552.387,
"eval_steps_per_second": 4.434,
"step": 1581
},
{
"epoch": 4.0,
"learning_rate": 3.680000000000001e-05,
"loss": 0.1394,
"step": 2108
},
{
"epoch": 4.0,
"eval_accuracy": 0.8176605504587156,
"eval_loss": 0.49214717745780945,
"eval_runtime": 1.5822,
"eval_samples_per_second": 551.116,
"eval_steps_per_second": 4.424,
"step": 2108
},
{
"epoch": 5.0,
"learning_rate": 3.6e-05,
"loss": 0.1111,
"step": 2635
},
{
"epoch": 5.0,
"eval_accuracy": 0.819954128440367,
"eval_loss": 0.5109940767288208,
"eval_runtime": 1.5752,
"eval_samples_per_second": 553.567,
"eval_steps_per_second": 4.444,
"step": 2635
},
{
"epoch": 6.0,
"learning_rate": 3.52e-05,
"loss": 0.0937,
"step": 3162
},
{
"epoch": 6.0,
"eval_accuracy": 0.8211009174311926,
"eval_loss": 0.6468406319618225,
"eval_runtime": 1.5781,
"eval_samples_per_second": 552.564,
"eval_steps_per_second": 4.436,
"step": 3162
},
{
"epoch": 6.0,
"step": 3162,
"total_flos": 5.984501559302554e+16,
"train_loss": 0.18471441202567845,
"train_runtime": 2261.4914,
"train_samples_per_second": 1489.039,
"train_steps_per_second": 11.652
}
],
"max_steps": 26350,
"num_train_epochs": 50,
"total_flos": 5.984501559302554e+16,
"trial_name": null,
"trial_params": null
}