{ "best_metric": 0.6491296227815271, "best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-0/checkpoint-3266", "epoch": 23.163120567375888, "global_step": 3266, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.01, "eval_loss": 0.3462072014808655, "eval_macro_f1": 0.0007381889763779527, "eval_macro_precision": 0.0625, "eval_macro_recall": 0.0003712871287128713, "eval_micro_f1": 0.0039447731755424065, "eval_micro_precision": 1.0, "eval_micro_recall": 0.001976284584980237, "eval_runtime": 2.8803, "eval_samples_per_second": 337.116, "eval_steps_per_second": 10.763, "step": 142 }, { "epoch": 2.01, "eval_loss": 0.26299169659614563, "eval_macro_f1": 0.03133608815426997, "eval_macro_precision": 0.051470588235294115, "eval_macro_recall": 0.022524752475247524, "eval_micro_f1": 0.20931569867740082, "eval_micro_precision": 0.8235294117647058, "eval_micro_recall": 0.11989459815546773, "eval_runtime": 2.7712, "eval_samples_per_second": 350.396, "eval_steps_per_second": 11.187, "step": 284 }, { "epoch": 3.02, "eval_loss": 0.2253277599811554, "eval_macro_f1": 0.06167745523885891, "eval_macro_precision": 0.16397973284200829, "eval_macro_recall": 0.06444494429328504, "eval_micro_f1": 0.43727272727272726, "eval_micro_precision": 0.7052785923753666, "eval_micro_recall": 0.3168642951251647, "eval_runtime": 2.7754, "eval_samples_per_second": 349.86, "eval_steps_per_second": 11.17, "step": 426 }, { "epoch": 3.55, "learning_rate": 1.858287403029302e-05, "loss": 0.3344, "step": 500 }, { "epoch": 4.03, "eval_loss": 0.19568884372711182, "eval_macro_f1": 0.19533405609363325, "eval_macro_precision": 0.3105566636324695, "eval_macro_recall": 0.1838257881936085, "eval_micro_f1": 0.6221161495624502, "eval_micro_precision": 0.785140562248996, "eval_micro_recall": 0.5151515151515151, "eval_runtime": 2.775, "eval_samples_per_second": 349.909, "eval_steps_per_second": 11.171, "step": 568 }, { "epoch": 5.04, "eval_loss": 0.16411657631397247, "eval_macro_f1": 0.2724936969098707, "eval_macro_precision": 0.3798299682715689, "eval_macro_recall": 0.24456296636870606, "eval_micro_f1": 0.6832491255343957, "eval_micro_precision": 0.833175355450237, "eval_micro_recall": 0.5790513833992095, "eval_runtime": 2.7735, "eval_samples_per_second": 350.097, "eval_steps_per_second": 11.177, "step": 710 }, { "epoch": 6.04, "eval_loss": 0.15434952080249786, "eval_macro_f1": 0.33512731711181853, "eval_macro_precision": 0.42023284164245034, "eval_macro_recall": 0.31527094611085327, "eval_micro_f1": 0.7207672445592034, "eval_micro_precision": 0.8189438390611903, "eval_micro_recall": 0.6436100131752306, "eval_runtime": 2.7765, "eval_samples_per_second": 349.72, "eval_steps_per_second": 11.165, "step": 852 }, { "epoch": 7.05, "eval_loss": 0.14012500643730164, "eval_macro_f1": 0.3798067714294707, "eval_macro_precision": 0.5469114307081249, "eval_macro_recall": 0.3317304014614234, "eval_micro_f1": 0.7399624765478424, "eval_micro_precision": 0.8596338273757629, "eval_micro_recall": 0.6495388669301713, "eval_runtime": 2.774, "eval_samples_per_second": 350.033, "eval_steps_per_second": 11.175, "step": 994 }, { "epoch": 7.09, "learning_rate": 3.0452387239092086e-05, "loss": 0.1403, "step": 1000 }, { "epoch": 8.06, "eval_loss": 0.1425222009420395, "eval_macro_f1": 0.4279380705267567, "eval_macro_precision": 0.603624913309443, "eval_macro_recall": 0.3703454980587147, "eval_micro_f1": 0.7544954128440367, "eval_micro_precision": 0.851698425849213, "eval_micro_recall": 0.6772068511198946, "eval_runtime": 2.7729, "eval_samples_per_second": 350.179, "eval_steps_per_second": 11.18, "step": 1136 }, { "epoch": 9.06, "eval_loss": 0.12965930998325348, "eval_macro_f1": 0.516908938978679, "eval_macro_precision": 0.6952135806026781, "eval_macro_recall": 0.4441065897269686, "eval_micro_f1": 0.7703488372093023, "eval_micro_precision": 0.8589951377633711, "eval_micro_recall": 0.6982872200263505, "eval_runtime": 2.7725, "eval_samples_per_second": 350.226, "eval_steps_per_second": 11.181, "step": 1278 }, { "epoch": 10.07, "eval_loss": 0.12852537631988525, "eval_macro_f1": 0.5388984906238257, "eval_macro_precision": 0.7130990451123878, "eval_macro_recall": 0.4711434333008039, "eval_micro_f1": 0.7730547550432276, "eval_micro_precision": 0.8529411764705882, "eval_micro_recall": 0.7068511198945981, "eval_runtime": 2.7737, "eval_samples_per_second": 350.077, "eval_steps_per_second": 11.176, "step": 1420 }, { "epoch": 10.64, "learning_rate": 2.929008238263819e-05, "loss": 0.0558, "step": 1500 }, { "epoch": 11.08, "eval_loss": 0.13507980108261108, "eval_macro_f1": 0.5653547754610186, "eval_macro_precision": 0.697025101599949, "eval_macro_recall": 0.49809191604893205, "eval_micro_f1": 0.7797808412866738, "eval_micro_precision": 0.8413424866514111, "eval_micro_recall": 0.7266139657444005, "eval_runtime": 2.7736, "eval_samples_per_second": 350.083, "eval_steps_per_second": 11.177, "step": 1562 }, { "epoch": 12.09, "eval_loss": 0.1355280876159668, "eval_macro_f1": 0.5794821565833095, "eval_macro_precision": 0.6818697605893614, "eval_macro_recall": 0.5236900724860433, "eval_micro_f1": 0.7849877236057524, "eval_micro_precision": 0.8394598649662416, "eval_micro_recall": 0.7371541501976284, "eval_runtime": 2.7759, "eval_samples_per_second": 349.801, "eval_steps_per_second": 11.168, "step": 1704 }, { "epoch": 13.09, "eval_loss": 0.13659390807151794, "eval_macro_f1": 0.5891915194817472, "eval_macro_precision": 0.69343533514632, "eval_macro_recall": 0.5302292848265341, "eval_micro_f1": 0.7858642407277815, "eval_micro_precision": 0.8380597014925373, "eval_micro_recall": 0.7397891963109354, "eval_runtime": 2.7739, "eval_samples_per_second": 350.043, "eval_steps_per_second": 11.175, "step": 1846 }, { "epoch": 14.1, "eval_loss": 0.13488434255123138, "eval_macro_f1": 0.5900914578255574, "eval_macro_precision": 0.6881746937300715, "eval_macro_recall": 0.5327336547593478, "eval_micro_f1": 0.7906326459279972, "eval_micro_precision": 0.842144452717796, "eval_micro_recall": 0.7450592885375494, "eval_runtime": 2.772, "eval_samples_per_second": 350.292, "eval_steps_per_second": 11.183, "step": 1988 }, { "epoch": 14.18, "learning_rate": 2.8127777526184293e-05, "loss": 0.0229, "step": 2000 }, { "epoch": 15.11, "eval_loss": 0.14160068333148956, "eval_macro_f1": 0.5920064296505824, "eval_macro_precision": 0.6848089552512611, "eval_macro_recall": 0.5382146732964487, "eval_micro_f1": 0.7904066736183525, "eval_micro_precision": 0.8366445916114791, "eval_micro_recall": 0.7490118577075099, "eval_runtime": 2.7738, "eval_samples_per_second": 350.061, "eval_steps_per_second": 11.176, "step": 2130 }, { "epoch": 16.11, "eval_loss": 0.14674818515777588, "eval_macro_f1": 0.5928173166365653, "eval_macro_precision": 0.6894481117094009, "eval_macro_recall": 0.5355030217646541, "eval_micro_f1": 0.7838118298166724, "eval_micro_precision": 0.8252002913328478, "eval_micro_recall": 0.7463768115942029, "eval_runtime": 2.7753, "eval_samples_per_second": 349.871, "eval_steps_per_second": 11.17, "step": 2272 }, { "epoch": 17.12, "eval_loss": 0.15975715219974518, "eval_macro_f1": 0.6260279452973108, "eval_macro_precision": 0.7038398537082291, "eval_macro_recall": 0.5866152515992036, "eval_micro_f1": 0.7704974271012006, "eval_micro_precision": 0.80386542591267, "eval_micro_recall": 0.7397891963109354, "eval_runtime": 2.773, "eval_samples_per_second": 350.168, "eval_steps_per_second": 11.179, "step": 2414 }, { "epoch": 17.73, "learning_rate": 2.6965472669730396e-05, "loss": 0.0137, "step": 2500 }, { "epoch": 18.13, "eval_loss": 0.15081025660037994, "eval_macro_f1": 0.6271302596876451, "eval_macro_precision": 0.7314709578369518, "eval_macro_recall": 0.5704921787075566, "eval_micro_f1": 0.7809989521480964, "eval_micro_precision": 0.8312267657992565, "eval_micro_recall": 0.7364953886693018, "eval_runtime": 2.7721, "eval_samples_per_second": 350.282, "eval_steps_per_second": 11.183, "step": 2556 }, { "epoch": 19.13, "eval_loss": 0.14972703158855438, "eval_macro_f1": 0.5953077373311189, "eval_macro_precision": 0.6771131830689183, "eval_macro_recall": 0.5419002355296083, "eval_micro_f1": 0.7900552486187846, "eval_micro_precision": 0.8301886792452831, "eval_micro_recall": 0.7536231884057971, "eval_runtime": 2.7725, "eval_samples_per_second": 350.229, "eval_steps_per_second": 11.181, "step": 2698 }, { "epoch": 20.14, "eval_loss": 0.16402311623096466, "eval_macro_f1": 0.5892530505072842, "eval_macro_precision": 0.6727972959139897, "eval_macro_recall": 0.5367187180351005, "eval_micro_f1": 0.779319916724497, "eval_micro_precision": 0.8233137829912024, "eval_micro_recall": 0.7397891963109354, "eval_runtime": 2.7746, "eval_samples_per_second": 349.963, "eval_steps_per_second": 11.173, "step": 2840 }, { "epoch": 21.15, "eval_loss": 0.16335928440093994, "eval_macro_f1": 0.6332863501223459, "eval_macro_precision": 0.7578187629295322, "eval_macro_recall": 0.5685989504723161, "eval_micro_f1": 0.7924791086350974, "eval_micro_precision": 0.8404726735598228, "eval_micro_recall": 0.7496706192358367, "eval_runtime": 2.7725, "eval_samples_per_second": 350.229, "eval_steps_per_second": 11.181, "step": 2982 }, { "epoch": 21.28, "learning_rate": 2.58031678132765e-05, "loss": 0.011, "step": 3000 }, { "epoch": 22.16, "eval_loss": 0.16375195980072021, "eval_macro_f1": 0.6343242676719218, "eval_macro_precision": 0.7473720060268463, "eval_macro_recall": 0.5714537925969617, "eval_micro_f1": 0.7909878682842287, "eval_micro_precision": 0.8346744696415508, "eval_micro_recall": 0.7516469038208169, "eval_runtime": 2.7735, "eval_samples_per_second": 350.093, "eval_steps_per_second": 11.177, "step": 3124 }, { "epoch": 23.16, "eval_loss": 0.16090567409992218, "eval_macro_f1": 0.6491296227815271, "eval_macro_precision": 0.7487080551943797, "eval_macro_recall": 0.5927079340096836, "eval_micro_f1": 0.7961299239806496, "eval_micro_precision": 0.8372093023255814, "eval_micro_recall": 0.758893280632411, "eval_runtime": 2.7735, "eval_samples_per_second": 350.094, "eval_steps_per_second": 11.177, "step": 3266 } ], "max_steps": 14100, "num_train_epochs": 100, "total_flos": 1.2446273222900736e+16, "trial_name": null, "trial_params": { "adam_epsilon": 6.356433345691159e-10, "learning_rate": 3.084757089028641e-05, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 16, "seed": 326, "warmup_steps": 830, "weight_decay": 0.0009910374448883887 } }