{ "best_metric": 0.691931962966919, "best_model_checkpoint": "hBERTv1_new_pretrain_w_init_48_ver2_rte/checkpoint-39", "epoch": 6.0, "eval_steps": 500, "global_step": 234, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.733333333333334e-05, "loss": 0.7175, "step": 39 }, { "epoch": 1.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.691931962966919, "eval_runtime": 1.4647, "eval_samples_per_second": 189.113, "eval_steps_per_second": 3.414, "step": 39 }, { "epoch": 2.0, "learning_rate": 3.466666666666667e-05, "loss": 0.7099, "step": 78 }, { "epoch": 2.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7006047368049622, "eval_runtime": 1.4696, "eval_samples_per_second": 188.487, "eval_steps_per_second": 3.402, "step": 78 }, { "epoch": 3.0, "learning_rate": 3.2000000000000005e-05, "loss": 0.7111, "step": 117 }, { "epoch": 3.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6926825046539307, "eval_runtime": 1.4718, "eval_samples_per_second": 188.209, "eval_steps_per_second": 3.397, "step": 117 }, { "epoch": 4.0, "learning_rate": 2.9333333333333333e-05, "loss": 0.7011, "step": 156 }, { "epoch": 4.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.6976003050804138, "eval_runtime": 1.4634, "eval_samples_per_second": 189.29, "eval_steps_per_second": 3.417, "step": 156 }, { "epoch": 5.0, "learning_rate": 2.6666666666666667e-05, "loss": 0.7021, "step": 195 }, { "epoch": 5.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.695507824420929, "eval_runtime": 1.4648, "eval_samples_per_second": 189.108, "eval_steps_per_second": 3.414, "step": 195 }, { "epoch": 6.0, "learning_rate": 2.4e-05, "loss": 0.6986, "step": 234 }, { "epoch": 6.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7077711820602417, "eval_runtime": 1.4767, "eval_samples_per_second": 187.58, "eval_steps_per_second": 3.386, "step": 234 }, { "epoch": 6.0, "step": 234, "total_flos": 2212565757198336.0, "train_loss": 0.7067025421012161, "train_runtime": 264.6778, "train_samples_per_second": 141.115, "train_steps_per_second": 2.21 } ], "logging_steps": 1, "max_steps": 585, "num_train_epochs": 15, "save_steps": 500, "total_flos": 2212565757198336.0, "trial_name": null, "trial_params": null }