{ "best_metric": 0.7849208402678758, "best_model_checkpoint": "tiny-llama/checkpoint-1284", "epoch": 8.0, "eval_steps": 500, "global_step": 5138, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.78, "learning_rate": 4.614485981308411e-05, "loss": 1.0444, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.8055770720371804, "eval_f1_macro": 0.6986433125217109, "eval_f1_micro": 0.8055770720371804, "eval_f1_weighted": 0.8014415131779974, "eval_loss": 0.5968185067176819, "eval_macro_fpr": 0.017477297659887057, "eval_macro_sensitivity": 0.6995136011888944, "eval_macro_specificity": 0.9852356804676051, "eval_precision": 0.8049997976909469, "eval_precision_macro": 0.7121994461809698, "eval_recall": 0.8055770720371804, "eval_recall_macro": 0.6995136011888944, "eval_runtime": 1398.4578, "eval_samples_per_second": 0.923, "eval_steps_per_second": 0.462, "eval_weighted_fpr": 0.01694686381743299, "eval_weighted_sensitivity": 0.8055770720371804, "eval_weighted_specificity": 0.9729581349768969, "step": 642 }, { "epoch": 1.56, "learning_rate": 4.2250778816199374e-05, "loss": 0.4788, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.8195197521301317, "eval_f1_macro": 0.7849208402678758, "eval_f1_micro": 0.8195197521301317, "eval_f1_weighted": 0.8172221848923289, "eval_loss": 0.696565568447113, "eval_macro_fpr": 0.016141504898788652, "eval_macro_sensitivity": 0.782528240704334, "eval_macro_specificity": 0.9863319896705219, "eval_precision": 0.8221579556499354, "eval_precision_macro": 0.8091762597974816, "eval_recall": 0.8195197521301317, "eval_recall_macro": 0.782528240704334, "eval_runtime": 1392.2226, "eval_samples_per_second": 0.927, "eval_steps_per_second": 0.464, "eval_weighted_fpr": 0.01548687271518777, "eval_weighted_sensitivity": 0.8195197521301317, "eval_weighted_specificity": 0.9754600929277001, "step": 1284 }, { "epoch": 2.34, "learning_rate": 3.836448598130841e-05, "loss": 0.3354, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.8326878388845856, "eval_f1_macro": 0.7742114597713703, "eval_f1_micro": 0.8326878388845856, "eval_f1_weighted": 0.8281979563082781, "eval_loss": 0.8046467304229736, "eval_macro_fpr": 0.014819935806968648, "eval_macro_sensitivity": 0.7581833318977771, "eval_macro_specificity": 0.9872339538797621, "eval_precision": 0.8276269450338689, "eval_precision_macro": 0.8057581057880366, "eval_recall": 0.8326878388845856, "eval_recall_macro": 0.7581833318977771, "eval_runtime": 1396.4981, "eval_samples_per_second": 0.924, "eval_steps_per_second": 0.463, "eval_weighted_fpr": 0.01414908947988995, "eval_weighted_sensitivity": 0.8326878388845856, "eval_weighted_specificity": 0.9758214693118433, "step": 1926 }, { "epoch": 3.11, "learning_rate": 3.4470404984423676e-05, "loss": 0.2132, "step": 2000 }, { "epoch": 3.89, "learning_rate": 3.057632398753894e-05, "loss": 0.0571, "step": 2500 }, { "epoch": 4.0, "eval_accuracy": 0.8264910921766073, "eval_f1_macro": 0.7690440255949657, "eval_f1_micro": 0.8264910921766073, "eval_f1_weighted": 0.8261718999247398, "eval_loss": 1.114306926727295, "eval_macro_fpr": 0.015230795356958942, "eval_macro_sensitivity": 0.7762905253241136, "eval_macro_specificity": 0.9869101098294825, "eval_precision": 0.8312136257886843, "eval_precision_macro": 0.7904073004865061, "eval_recall": 0.8264910921766073, "eval_recall_macro": 0.7762905253241136, "eval_runtime": 1399.2136, "eval_samples_per_second": 0.923, "eval_steps_per_second": 0.462, "eval_weighted_fpr": 0.014773776546629732, "eval_weighted_sensitivity": 0.8264910921766073, "eval_weighted_specificity": 0.9771605552656325, "step": 2569 }, { "epoch": 4.67, "learning_rate": 2.6682242990654205e-05, "loss": 0.0187, "step": 3000 }, { "epoch": 5.0, "eval_accuracy": 0.8319132455460883, "eval_f1_macro": 0.7637547171369754, "eval_f1_micro": 0.8319132455460883, "eval_f1_weighted": 0.8302763212360192, "eval_loss": 1.110449194908142, "eval_macro_fpr": 0.014862942310551715, "eval_macro_sensitivity": 0.7723638117326753, "eval_macro_specificity": 0.9872593002552416, "eval_precision": 0.8316344032874086, "eval_precision_macro": 0.7744559231287563, "eval_recall": 0.8319132455460883, "eval_recall_macro": 0.7723638117326753, "eval_runtime": 1397.1179, "eval_samples_per_second": 0.924, "eval_steps_per_second": 0.462, "eval_weighted_fpr": 0.014226709499770536, "eval_weighted_sensitivity": 0.8319132455460883, "eval_weighted_specificity": 0.9769762582825359, "step": 3211 }, { "epoch": 5.45, "learning_rate": 2.2788161993769472e-05, "loss": 0.0071, "step": 3500 }, { "epoch": 6.0, "eval_accuracy": 0.8241673121611154, "eval_f1_macro": 0.745099817904837, "eval_f1_micro": 0.8241673121611154, "eval_f1_weighted": 0.8209070583364038, "eval_loss": 1.144503116607666, "eval_macro_fpr": 0.01570185641881881, "eval_macro_sensitivity": 0.738352723089731, "eval_macro_specificity": 0.9866424210438552, "eval_precision": 0.8210083679630991, "eval_precision_macro": 0.7683618012500858, "eval_recall": 0.8241673121611154, "eval_recall_macro": 0.738352723089731, "eval_runtime": 1397.6847, "eval_samples_per_second": 0.924, "eval_steps_per_second": 0.462, "eval_weighted_fpr": 0.015010249289162203, "eval_weighted_sensitivity": 0.8241673121611154, "eval_weighted_specificity": 0.9754690034967137, "step": 3853 }, { "epoch": 6.23, "learning_rate": 1.8894080996884735e-05, "loss": 0.0002, "step": 4000 }, { "epoch": 7.0, "eval_accuracy": 0.8326878388845856, "eval_f1_macro": 0.7617153787626418, "eval_f1_micro": 0.8326878388845856, "eval_f1_weighted": 0.8292501117913054, "eval_loss": 1.2032252550125122, "eval_macro_fpr": 0.014823835936918472, "eval_macro_sensitivity": 0.752929076567028, "eval_macro_specificity": 0.9872777146881934, "eval_precision": 0.8301989992809635, "eval_precision_macro": 0.7985351753450873, "eval_recall": 0.8326878388845856, "eval_recall_macro": 0.752929076567028, "eval_runtime": 1403.2825, "eval_samples_per_second": 0.92, "eval_steps_per_second": 0.46, "eval_weighted_fpr": 0.01414908947988995, "eval_weighted_sensitivity": 0.8326878388845856, "eval_weighted_specificity": 0.9764778814383137, "step": 4495 }, { "epoch": 7.01, "learning_rate": 1.5e-05, "loss": 0.0028, "step": 4500 }, { "epoch": 7.79, "learning_rate": 1.1105919003115265e-05, "loss": 0.0028, "step": 5000 }, { "epoch": 8.0, "eval_accuracy": 0.82571649883811, "eval_f1_macro": 0.755227710037701, "eval_f1_micro": 0.82571649883811, "eval_f1_weighted": 0.8229426380981488, "eval_loss": 1.1918259859085083, "eval_macro_fpr": 0.015546599693760165, "eval_macro_sensitivity": 0.7493334411200354, "eval_macro_specificity": 0.986754396380849, "eval_precision": 0.8225901566549486, "eval_precision_macro": 0.7737967354767298, "eval_recall": 0.82571649883811, "eval_recall_macro": 0.7493334411200354, "eval_runtime": 1398.648, "eval_samples_per_second": 0.923, "eval_steps_per_second": 0.462, "eval_weighted_fpr": 0.01485246550927454, "eval_weighted_sensitivity": 0.82571649883811, "eval_weighted_specificity": 0.975599446874626, "step": 5138 } ], "logging_steps": 500, "max_steps": 6420, "num_train_epochs": 10, "save_steps": 500, "total_flos": 4.958193946019758e+17, "trial_name": null, "trial_params": null }