{ "best_metric": 0.7025796661608499, "best_model_checkpoint": "./results/checkpoint-4434", "epoch": 15.0, "global_step": 22170, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.68, "learning_rate": 2e-06, "loss": 0.4736, "step": 1000 }, { "epoch": 1.0, "eval_accuracy": 0.8308534531162268, "eval_f1": 0.682141915061989, "eval_loss": 0.37968066334724426, "eval_precision": 0.6996753246753247, "eval_recall": 0.6654657745753989, "eval_runtime": 2.7102, "eval_samples_per_second": 68.261, "eval_steps_per_second": 68.261, "step": 1478 }, { "epoch": 1.35, "learning_rate": 1.9055266887104394e-06, "loss": 0.3656, "step": 2000 }, { "epoch": 2.0, "eval_accuracy": 0.826221224031443, "eval_f1": 0.6371629542790153, "eval_loss": 0.37435096502304077, "eval_precision": 0.7399591558883595, "eval_recall": 0.5594441585177561, "eval_runtime": 2.6167, "eval_samples_per_second": 70.701, "eval_steps_per_second": 70.701, "step": 2956 }, { "epoch": 2.03, "learning_rate": 1.8110533774208785e-06, "loss": 0.3599, "step": 3000 }, { "epoch": 2.71, "learning_rate": 1.716580066131318e-06, "loss": 0.3458, "step": 4000 }, { "epoch": 3.0, "eval_accuracy": 0.8349241998877035, "eval_f1": 0.7025796661608499, "eval_loss": 0.36284855008125305, "eval_precision": 0.6907011437095972, "eval_recall": 0.7148739063304169, "eval_runtime": 2.6495, "eval_samples_per_second": 69.825, "eval_steps_per_second": 69.825, "step": 4434 }, { "epoch": 3.38, "learning_rate": 1.622106754841757e-06, "loss": 0.337, "step": 5000 }, { "epoch": 4.0, "eval_accuracy": 0.8281864121280179, "eval_f1": 0.651678998292544, "eval_loss": 0.37390556931495667, "eval_precision": 0.728835136855506, "eval_recall": 0.5892949047864128, "eval_runtime": 2.6125, "eval_samples_per_second": 70.813, "eval_steps_per_second": 70.813, "step": 5912 }, { "epoch": 4.06, "learning_rate": 1.5276334435521965e-06, "loss": 0.3239, "step": 6000 }, { "epoch": 4.74, "learning_rate": 1.4331601322626356e-06, "loss": 0.3015, "step": 7000 }, { "epoch": 5.0, "eval_accuracy": 0.8357664233576643, "eval_f1": 0.6978305785123967, "eval_loss": 0.36176469922065735, "eval_precision": 0.7003628823224468, "eval_recall": 0.6953165208440556, "eval_runtime": 2.8697, "eval_samples_per_second": 64.467, "eval_steps_per_second": 64.467, "step": 7390 }, { "epoch": 5.41, "learning_rate": 1.338686820973075e-06, "loss": 0.2884, "step": 8000 }, { "epoch": 6.0, "eval_accuracy": 0.833941605839416, "eval_f1": 0.6902330452998167, "eval_loss": 0.37303170561790466, "eval_precision": 0.7025586353944563, "eval_recall": 0.6783324755532681, "eval_runtime": 2.7072, "eval_samples_per_second": 68.337, "eval_steps_per_second": 68.337, "step": 8868 }, { "epoch": 6.09, "learning_rate": 1.2442135096835144e-06, "loss": 0.264, "step": 9000 }, { "epoch": 6.77, "learning_rate": 1.1497401983939536e-06, "loss": 0.2324, "step": 10000 }, { "epoch": 7.0, "eval_accuracy": 0.8322571588994947, "eval_f1": 0.6844467916556641, "eval_loss": 0.3992096483707428, "eval_precision": 0.702819956616052, "eval_recall": 0.6670097786927431, "eval_runtime": 2.6272, "eval_samples_per_second": 70.416, "eval_steps_per_second": 70.416, "step": 10346 }, { "epoch": 7.44, "learning_rate": 1.0552668871043931e-06, "loss": 0.1965, "step": 11000 }, { "epoch": 8.0, "eval_accuracy": 0.8250982594048287, "eval_f1": 0.6525376464026771, "eval_loss": 0.44216522574424744, "eval_precision": 0.7121119902617163, "eval_recall": 0.602161605764282, "eval_runtime": 2.622, "eval_samples_per_second": 70.557, "eval_steps_per_second": 70.557, "step": 11824 }, { "epoch": 8.12, "learning_rate": 9.607935758148322e-07, "loss": 0.182, "step": 12000 }, { "epoch": 8.8, "learning_rate": 8.663202645252716e-07, "loss": 0.1405, "step": 13000 }, { "epoch": 9.0, "eval_accuracy": 0.823694553621561, "eval_f1": 0.6694736842105264, "eval_loss": 0.49459609389305115, "eval_precision": 0.6849757673667205, "eval_recall": 0.6546577457539887, "eval_runtime": 2.6563, "eval_samples_per_second": 69.645, "eval_steps_per_second": 69.645, "step": 13302 }, { "epoch": 9.47, "learning_rate": 7.718469532357109e-07, "loss": 0.1149, "step": 14000 }, { "epoch": 10.0, "eval_accuracy": 0.8238349241998877, "eval_f1": 0.6766297346044834, "eval_loss": 0.5486593842506409, "eval_precision": 0.6775025799793601, "eval_recall": 0.6757591353576943, "eval_runtime": 2.6698, "eval_samples_per_second": 69.293, "eval_steps_per_second": 69.293, "step": 14780 }, { "epoch": 10.15, "learning_rate": 6.773736419461502e-07, "loss": 0.1013, "step": 15000 }, { "epoch": 10.83, "learning_rate": 5.829003306565895e-07, "loss": 0.0808, "step": 16000 }, { "epoch": 11.0, "eval_accuracy": 0.8252386299831556, "eval_f1": 0.6767073487405868, "eval_loss": 0.6354735493659973, "eval_precision": 0.6829140461215933, "eval_recall": 0.6706124549665465, "eval_runtime": 2.7422, "eval_samples_per_second": 67.465, "eval_steps_per_second": 67.465, "step": 16258 }, { "epoch": 11.5, "learning_rate": 4.884270193670288e-07, "loss": 0.0627, "step": 17000 }, { "epoch": 12.0, "eval_accuracy": 0.8260808534531162, "eval_f1": 0.6648634027589938, "eval_loss": 0.6998042464256287, "eval_precision": 0.7006841505131128, "eval_recall": 0.6325270200720535, "eval_runtime": 2.7874, "eval_samples_per_second": 66.371, "eval_steps_per_second": 66.371, "step": 17736 }, { "epoch": 12.18, "learning_rate": 3.939537080774681e-07, "loss": 0.0496, "step": 18000 }, { "epoch": 12.86, "learning_rate": 2.9948039678790744e-07, "loss": 0.0399, "step": 19000 }, { "epoch": 13.0, "eval_accuracy": 0.8258001122964627, "eval_f1": 0.6703851261620186, "eval_loss": 0.7429465651512146, "eval_precision": 0.6926454445664105, "eval_recall": 0.649511065362841, "eval_runtime": 2.7701, "eval_samples_per_second": 66.784, "eval_steps_per_second": 66.784, "step": 19214 }, { "epoch": 13.53, "learning_rate": 2.0500708549834672e-07, "loss": 0.032, "step": 20000 }, { "epoch": 14.0, "eval_accuracy": 0.8218697361033127, "eval_f1": 0.6556309362279512, "eval_loss": 0.7868731617927551, "eval_precision": 0.6934557979334098, "eval_recall": 0.6217189912506433, "eval_runtime": 2.7709, "eval_samples_per_second": 66.766, "eval_steps_per_second": 66.766, "step": 20692 }, { "epoch": 14.21, "learning_rate": 1.1053377420878601e-07, "loss": 0.0274, "step": 21000 }, { "epoch": 14.88, "learning_rate": 1.606046291922532e-08, "loss": 0.0233, "step": 22000 }, { "epoch": 15.0, "eval_accuracy": 0.8235541830432341, "eval_f1": 0.6621875839828004, "eval_loss": 0.7936236262321472, "eval_precision": 0.6929133858267716, "eval_recall": 0.6340710241893979, "eval_runtime": 2.7604, "eval_samples_per_second": 67.019, "eval_steps_per_second": 67.019, "step": 22170 } ], "max_steps": 22170, "num_train_epochs": 15, "total_flos": 7912225034580240.0, "trial_name": null, "trial_params": null }