{ "best_metric": 0.4374756335282651, "best_model_checkpoint": "./results/checkpoint-2724", "epoch": 6.0, "eval_steps": 500, "global_step": 2724, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_auc_pr": 0.3741202821573497, "eval_auc_roc": 0.5344017094017094, "eval_loss": 0.6598178148269653, "eval_runtime": 64.1274, "eval_samples_per_second": 1.575, "eval_steps_per_second": 0.795, "step": 454 }, { "epoch": 1.1, "grad_norm": 2.729569673538208, "learning_rate": 3.2701908957415565e-05, "loss": 0.7137, "step": 500 }, { "epoch": 2.0, "eval_auc_pr": 0.3336610256770358, "eval_auc_roc": 0.43162393162393164, "eval_loss": 0.6951384544372559, "eval_runtime": 63.851, "eval_samples_per_second": 1.582, "eval_steps_per_second": 0.799, "step": 908 }, { "epoch": 2.2, "grad_norm": 4.023996829986572, "learning_rate": 2.535976505139501e-05, "loss": 0.6916, "step": 1000 }, { "epoch": 3.0, "eval_auc_pr": 0.3415869066362487, "eval_auc_roc": 0.4920940170940171, "eval_loss": 0.653586208820343, "eval_runtime": 63.8711, "eval_samples_per_second": 1.581, "eval_steps_per_second": 0.798, "step": 1362 }, { "epoch": 3.3, "grad_norm": 3.374420642852783, "learning_rate": 1.801762114537445e-05, "loss": 0.7041, "step": 1500 }, { "epoch": 4.0, "eval_auc_pr": 0.3571268522434573, "eval_auc_roc": 0.5004273504273504, "eval_loss": 0.6515412330627441, "eval_runtime": 63.7749, "eval_samples_per_second": 1.584, "eval_steps_per_second": 0.8, "step": 1816 }, { "epoch": 4.41, "grad_norm": 2.160428285598755, "learning_rate": 1.0675477239353893e-05, "loss": 0.6616, "step": 2000 }, { "epoch": 5.0, "eval_auc_pr": 0.39159998056113526, "eval_auc_roc": 0.5651709401709402, "eval_loss": 0.6515726447105408, "eval_runtime": 63.9137, "eval_samples_per_second": 1.58, "eval_steps_per_second": 0.798, "step": 2270 }, { "epoch": 5.51, "grad_norm": 1.967381477355957, "learning_rate": 3.3333333333333333e-06, "loss": 0.6673, "step": 2500 }, { "epoch": 6.0, "eval_auc_pr": 0.4374756335282651, "eval_auc_roc": 0.6269230769230769, "eval_loss": 0.6512559652328491, "eval_runtime": 63.8923, "eval_samples_per_second": 1.581, "eval_steps_per_second": 0.798, "step": 2724 } ], "logging_steps": 500, "max_steps": 2724, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 1.4298345882353664e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }