{ "best_metric": 0.7985673319510695, "best_model_checkpoint": "InLegalBERT/checkpoint-3215", "epoch": 11.0, "eval_steps": 500, "global_step": 7073, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.78, "grad_norm": 6.225058078765869, "learning_rate": 4.742871954380509e-05, "loss": 1.065, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.7993803253292022, "eval_f1_macro": 0.6028764374506596, "eval_f1_micro": 0.7993803253292022, "eval_f1_weighted": 0.780434475079005, "eval_loss": 0.6395189166069031, "eval_macro_fpr": 0.01853296480715729, "eval_macro_sensitivity": 0.6307714529979154, "eval_macro_specificity": 0.9847175193097543, "eval_precision": 0.7817573111259606, "eval_precision_macro": 0.6194286814679265, "eval_recall": 0.7993803253292022, "eval_recall_macro": 0.6307714529979154, "eval_runtime": 29.4696, "eval_samples_per_second": 43.808, "eval_steps_per_second": 5.497, "eval_weighted_fpr": 0.017610661589719183, "eval_weighted_sensitivity": 0.7993803253292022, "eval_weighted_specificity": 0.9713824643171124, "step": 643 }, { "epoch": 1.56, "grad_norm": 8.756747245788574, "learning_rate": 4.483670295489891e-05, "loss": 0.5866, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.8187451587916343, "eval_f1_macro": 0.7276084501581802, "eval_f1_micro": 0.8187451587916342, "eval_f1_weighted": 0.8152450677099363, "eval_loss": 0.6906521916389465, "eval_macro_fpr": 0.01610571025604924, "eval_macro_sensitivity": 0.7365748487318147, "eval_macro_specificity": 0.9863504064994671, "eval_precision": 0.8198905458065566, "eval_precision_macro": 0.7284980957351811, "eval_recall": 0.8187451587916343, "eval_recall_macro": 0.7365748487318147, "eval_runtime": 57.1075, "eval_samples_per_second": 22.606, "eval_steps_per_second": 2.837, "eval_weighted_fpr": 0.015566790846194785, "eval_weighted_sensitivity": 0.8187451587916343, "eval_weighted_specificity": 0.976510938700373, "step": 1286 }, { "epoch": 2.33, "grad_norm": 8.241662979125977, "learning_rate": 4.224468636599274e-05, "loss": 0.4622, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.8179705654531371, "eval_f1_macro": 0.7282676953256336, "eval_f1_micro": 0.817970565453137, "eval_f1_weighted": 0.8150036178041801, "eval_loss": 0.8055915236473083, "eval_macro_fpr": 0.016168738933500626, "eval_macro_sensitivity": 0.737597935006265, "eval_macro_specificity": 0.9862931306285473, "eval_precision": 0.8137371356291269, "eval_precision_macro": 0.7226669118270935, "eval_recall": 0.8179705654531371, "eval_recall_macro": 0.737597935006265, "eval_runtime": 56.5581, "eval_samples_per_second": 22.826, "eval_steps_per_second": 2.864, "eval_weighted_fpr": 0.01564684732671949, "eval_weighted_sensitivity": 0.8179705654531371, "eval_weighted_specificity": 0.9764263939750704, "step": 1929 }, { "epoch": 3.11, "grad_norm": 5.718050003051758, "learning_rate": 3.9652669777086575e-05, "loss": 0.346, "step": 2000 }, { "epoch": 3.89, "grad_norm": 28.698440551757812, "learning_rate": 3.7060653188180404e-05, "loss": 0.2398, "step": 2500 }, { "epoch": 4.0, "eval_accuracy": 0.8171959721146398, "eval_f1_macro": 0.7407336443567221, "eval_f1_micro": 0.8171959721146398, "eval_f1_weighted": 0.8160506897887181, "eval_loss": 0.9310164451599121, "eval_macro_fpr": 0.016134598971936556, "eval_macro_sensitivity": 0.7424877811517157, "eval_macro_specificity": 0.9862275325034797, "eval_precision": 0.8234764041889034, "eval_precision_macro": 0.7660803499273104, "eval_recall": 0.8171959721146398, "eval_recall_macro": 0.7424877811517157, "eval_runtime": 58.71, "eval_samples_per_second": 21.989, "eval_steps_per_second": 2.759, "eval_weighted_fpr": 0.015727042516326804, "eval_weighted_sensitivity": 0.8171959721146398, "eval_weighted_specificity": 0.9762170154375561, "step": 2572 }, { "epoch": 4.67, "grad_norm": 8.432526588439941, "learning_rate": 3.447382063245205e-05, "loss": 0.1611, "step": 3000 }, { "epoch": 5.0, "eval_accuracy": 0.8303640588690937, "eval_f1_macro": 0.7985673319510695, "eval_f1_micro": 0.8303640588690937, "eval_f1_weighted": 0.8304130023375266, "eval_loss": 1.0763046741485596, "eval_macro_fpr": 0.014791024663918677, "eval_macro_sensitivity": 0.7918299512118986, "eval_macro_specificity": 0.9872504344639083, "eval_precision": 0.8362631231068205, "eval_precision_macro": 0.8174407838611729, "eval_recall": 0.8303640588690937, "eval_recall_macro": 0.7918299512118986, "eval_runtime": 71.8128, "eval_samples_per_second": 17.977, "eval_steps_per_second": 2.256, "eval_weighted_fpr": 0.014382347146516056, "eval_weighted_sensitivity": 0.8303640588690937, "eval_weighted_specificity": 0.978392458089531, "step": 3215 }, { "epoch": 5.44, "grad_norm": 0.10359270125627518, "learning_rate": 3.1881804043545884e-05, "loss": 0.1055, "step": 3500 }, { "epoch": 6.0, "eval_accuracy": 0.82571649883811, "eval_f1_macro": 0.786305894104014, "eval_f1_micro": 0.82571649883811, "eval_f1_weighted": 0.824551084474796, "eval_loss": 1.1377127170562744, "eval_macro_fpr": 0.015367631824352218, "eval_macro_sensitivity": 0.7810139173900718, "eval_macro_specificity": 0.9868830526127108, "eval_precision": 0.8275394319817495, "eval_precision_macro": 0.8039087862954798, "eval_recall": 0.82571649883811, "eval_recall_macro": 0.7810139173900718, "eval_runtime": 64.6694, "eval_samples_per_second": 19.963, "eval_steps_per_second": 2.505, "eval_weighted_fpr": 0.01485246550927454, "eval_weighted_sensitivity": 0.82571649883811, "eval_weighted_specificity": 0.9775292903525532, "step": 3858 }, { "epoch": 6.22, "grad_norm": 0.07212593406438828, "learning_rate": 2.928978745463971e-05, "loss": 0.1152, "step": 4000 }, { "epoch": 7.0, "grad_norm": 0.15883693099021912, "learning_rate": 2.669777086573354e-05, "loss": 0.0463, "step": 4500 }, { "epoch": 7.0, "eval_accuracy": 0.8071262587141751, "eval_f1_macro": 0.7661147949067431, "eval_f1_micro": 0.8071262587141751, "eval_f1_weighted": 0.8078396785965507, "eval_loss": 1.321502447128296, "eval_macro_fpr": 0.017229971151571283, "eval_macro_sensitivity": 0.7688718615633127, "eval_macro_specificity": 0.9855509290260365, "eval_precision": 0.8111033305830805, "eval_precision_macro": 0.7691608912363075, "eval_recall": 0.8071262587141751, "eval_recall_macro": 0.7688718615633127, "eval_runtime": 77.5552, "eval_samples_per_second": 16.646, "eval_steps_per_second": 2.089, "eval_weighted_fpr": 0.016782368403316036, "eval_weighted_sensitivity": 0.8071262587141751, "eval_weighted_specificity": 0.9761376766763701, "step": 4501 }, { "epoch": 7.78, "grad_norm": 0.003739361884072423, "learning_rate": 2.4105754276827372e-05, "loss": 0.031, "step": 5000 }, { "epoch": 8.0, "eval_accuracy": 0.820294345468629, "eval_f1_macro": 0.7689822136833361, "eval_f1_micro": 0.8202943454686291, "eval_f1_weighted": 0.8174949305492484, "eval_loss": 1.3483381271362305, "eval_macro_fpr": 0.01612405504801943, "eval_macro_sensitivity": 0.7726825622003671, "eval_macro_specificity": 0.9863605879945143, "eval_precision": 0.8169871783387602, "eval_precision_macro": 0.7773067113000881, "eval_recall": 0.820294345468629, "eval_recall_macro": 0.7726825622003671, "eval_runtime": 57.45, "eval_samples_per_second": 22.472, "eval_steps_per_second": 2.82, "eval_weighted_fpr": 0.015407092575375215, "eval_weighted_sensitivity": 0.820294345468629, "eval_weighted_specificity": 0.9751144744490858, "step": 5144 }, { "epoch": 8.55, "grad_norm": 0.008842483162879944, "learning_rate": 2.151892172109902e-05, "loss": 0.0202, "step": 5500 }, { "epoch": 9.0, "eval_accuracy": 0.8280402788536019, "eval_f1_macro": 0.7753479744325771, "eval_f1_micro": 0.8280402788536019, "eval_f1_weighted": 0.825561089776041, "eval_loss": 1.3729532957077026, "eval_macro_fpr": 0.015177813835714501, "eval_macro_sensitivity": 0.7802568330601105, "eval_macro_specificity": 0.9870641550691818, "eval_precision": 0.8263269633953413, "eval_precision_macro": 0.7817620978620624, "eval_recall": 0.8280402788536019, "eval_recall_macro": 0.7802568330601105, "eval_runtime": 69.0336, "eval_samples_per_second": 18.701, "eval_steps_per_second": 2.347, "eval_weighted_fpr": 0.014616802739004477, "eval_weighted_sensitivity": 0.8280402788536019, "eval_weighted_specificity": 0.977922047184124, "step": 5787 }, { "epoch": 9.33, "grad_norm": 0.0021986050996929407, "learning_rate": 1.8926905132192844e-05, "loss": 0.0133, "step": 6000 }, { "epoch": 10.0, "eval_accuracy": 0.8164213787761425, "eval_f1_macro": 0.7655491262772139, "eval_f1_micro": 0.8164213787761425, "eval_f1_weighted": 0.8134862058117613, "eval_loss": 1.5407140254974365, "eval_macro_fpr": 0.016532295658760295, "eval_macro_sensitivity": 0.7779131623925402, "eval_macro_specificity": 0.9861038702342115, "eval_precision": 0.8162862069364494, "eval_precision_macro": 0.7687513177071161, "eval_recall": 0.8164213787761425, "eval_recall_macro": 0.7779131623925402, "eval_runtime": 56.3906, "eval_samples_per_second": 22.894, "eval_steps_per_second": 2.873, "eval_weighted_fpr": 0.01580737677582872, "eval_weighted_sensitivity": 0.8164213787761425, "eval_weighted_specificity": 0.9751366747370328, "step": 6430 }, { "epoch": 10.11, "grad_norm": 0.006160721182823181, "learning_rate": 1.6334888543286677e-05, "loss": 0.0059, "step": 6500 }, { "epoch": 10.89, "grad_norm": 0.04344305023550987, "learning_rate": 1.374287195438051e-05, "loss": 0.0051, "step": 7000 }, { "epoch": 11.0, "eval_accuracy": 0.8226181254841208, "eval_f1_macro": 0.7743760234813677, "eval_f1_micro": 0.8226181254841208, "eval_f1_weighted": 0.8234395708523029, "eval_loss": 1.5235263109207153, "eval_macro_fpr": 0.015631611736392083, "eval_macro_sensitivity": 0.7679556612821589, "eval_macro_specificity": 0.9866336477591318, "eval_precision": 0.8265113395379454, "eval_precision_macro": 0.7900275628638693, "eval_recall": 0.8226181254841208, "eval_recall_macro": 0.7679556612821589, "eval_runtime": 61.2541, "eval_samples_per_second": 21.076, "eval_steps_per_second": 2.645, "eval_weighted_fpr": 0.01516857653838511, "eval_weighted_sensitivity": 0.8226181254841208, "eval_weighted_specificity": 0.9768865909028567, "step": 7073 } ], "logging_steps": 500, "max_steps": 9645, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "total_flos": 1.4872246338926592e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }