{ "best_metric": 0.7672541719320296, "best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-6/checkpoint-3550", "epoch": 50.0, "global_step": 3550, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0, "eval_loss": 0.24808131158351898, "eval_macro_f1": 0.05481647030175482, "eval_macro_precision": 0.17800453514739228, "eval_macro_recall": 0.03330847588023425, "eval_micro_f1": 0.13594611145131658, "eval_micro_precision": 0.9568965517241379, "eval_micro_recall": 0.07317073170731707, "eval_runtime": 2.8186, "eval_samples_per_second": 342.724, "eval_steps_per_second": 21.642, "step": 142 }, { "epoch": 4.0, "eval_loss": 0.16814221441745758, "eval_macro_f1": 0.37346208328620767, "eval_macro_precision": 0.4376472360907343, "eval_macro_recall": 0.3342883491414155, "eval_micro_f1": 0.7248062015503876, "eval_micro_precision": 0.8795860771401693, "eval_micro_recall": 0.6163480553724456, "eval_runtime": 2.8192, "eval_samples_per_second": 342.653, "eval_steps_per_second": 21.638, "step": 284 }, { "epoch": 6.0, "eval_loss": 0.13150478899478912, "eval_macro_f1": 0.4319986220599845, "eval_macro_precision": 0.6389953058630822, "eval_macro_recall": 0.37598442828818834, "eval_micro_f1": 0.7641723356009071, "eval_micro_precision": 0.895482728077945, "eval_micro_recall": 0.6664469347396177, "eval_runtime": 2.8191, "eval_samples_per_second": 342.658, "eval_steps_per_second": 21.638, "step": 426 }, { "epoch": 7.04, "learning_rate": 2.8662355855330125e-05, "loss": 0.2172, "step": 500 }, { "epoch": 8.0, "eval_loss": 0.1121998205780983, "eval_macro_f1": 0.5386615940425785, "eval_macro_precision": 0.7631201629660762, "eval_macro_recall": 0.46714612883136086, "eval_micro_f1": 0.8077485380116959, "eval_micro_precision": 0.9064807219031994, "eval_micro_recall": 0.7284113381674358, "eval_runtime": 2.8196, "eval_samples_per_second": 342.598, "eval_steps_per_second": 21.634, "step": 568 }, { "epoch": 10.0, "eval_loss": 0.10608664900064468, "eval_macro_f1": 0.5851323545796894, "eval_macro_precision": 0.7572045278273729, "eval_macro_recall": 0.5132727928856573, "eval_micro_f1": 0.8146802325581396, "eval_micro_precision": 0.9076923076923077, "eval_micro_recall": 0.7389584706657878, "eval_runtime": 2.8204, "eval_samples_per_second": 342.507, "eval_steps_per_second": 21.628, "step": 710 }, { "epoch": 12.0, "eval_loss": 0.100788913667202, "eval_macro_f1": 0.628825318779795, "eval_macro_precision": 0.750817062947988, "eval_macro_recall": 0.5578116563013593, "eval_micro_f1": 0.8245363766048501, "eval_micro_precision": 0.8982128982128982, "eval_micro_recall": 0.7620303230059328, "eval_runtime": 2.8211, "eval_samples_per_second": 342.425, "eval_steps_per_second": 21.623, "step": 852 }, { "epoch": 14.0, "eval_loss": 0.09952697157859802, "eval_macro_f1": 0.648367052483795, "eval_macro_precision": 0.7408904755242313, "eval_macro_recall": 0.5923039028415692, "eval_micro_f1": 0.8379418970948547, "eval_micro_precision": 0.8932835820895523, "eval_micro_recall": 0.7890573500329597, "eval_runtime": 2.8203, "eval_samples_per_second": 342.519, "eval_steps_per_second": 21.629, "step": 994 }, { "epoch": 14.08, "learning_rate": 2.6490965260229357e-05, "loss": 0.0388, "step": 1000 }, { "epoch": 16.0, "eval_loss": 0.10003374516963959, "eval_macro_f1": 0.7100297932545989, "eval_macro_precision": 0.8731926397728601, "eval_macro_recall": 0.6387457691830838, "eval_micro_f1": 0.837847344354555, "eval_micro_precision": 0.8981900452488688, "eval_micro_recall": 0.7851021753460777, "eval_runtime": 2.8204, "eval_samples_per_second": 342.505, "eval_steps_per_second": 21.628, "step": 1136 }, { "epoch": 18.0, "eval_loss": 0.10116878896951675, "eval_macro_f1": 0.7102542897490418, "eval_macro_precision": 0.8708979085725551, "eval_macro_recall": 0.6278706438272531, "eval_micro_f1": 0.8369795342272407, "eval_micro_precision": 0.9005315110098709, "eval_micro_recall": 0.7818061964403428, "eval_runtime": 2.821, "eval_samples_per_second": 342.431, "eval_steps_per_second": 21.624, "step": 1278 }, { "epoch": 20.0, "eval_loss": 0.10277832299470901, "eval_macro_f1": 0.7155479835258449, "eval_macro_precision": 0.8815899776220457, "eval_macro_recall": 0.635461059766739, "eval_micro_f1": 0.8361702127659574, "eval_micro_precision": 0.9048349961627015, "eval_micro_recall": 0.7771918259723137, "eval_runtime": 2.8201, "eval_samples_per_second": 342.543, "eval_steps_per_second": 21.631, "step": 1420 }, { "epoch": 21.13, "learning_rate": 2.431957466512859e-05, "loss": 0.0147, "step": 1500 }, { "epoch": 22.0, "eval_loss": 0.10351855307817459, "eval_macro_f1": 0.7203873361325647, "eval_macro_precision": 0.87817690487536, "eval_macro_recall": 0.6443209986334517, "eval_micro_f1": 0.837389770723104, "eval_micro_precision": 0.9006069802731411, "eval_micro_recall": 0.7824653922214898, "eval_runtime": 2.8203, "eval_samples_per_second": 342.52, "eval_steps_per_second": 21.629, "step": 1562 }, { "epoch": 24.0, "eval_loss": 0.10557578504085541, "eval_macro_f1": 0.7298035823714943, "eval_macro_precision": 0.8804692132123697, "eval_macro_recall": 0.6544575680820387, "eval_micro_f1": 0.8399153737658673, "eval_micro_precision": 0.9029567854435178, "eval_micro_recall": 0.7851021753460777, "eval_runtime": 2.8192, "eval_samples_per_second": 342.647, "eval_steps_per_second": 21.637, "step": 1704 }, { "epoch": 26.0, "eval_loss": 0.10698197782039642, "eval_macro_f1": 0.719671126038702, "eval_macro_precision": 0.8799359996888543, "eval_macro_recall": 0.6422398711062759, "eval_micro_f1": 0.8375706214689265, "eval_micro_precision": 0.9019011406844106, "eval_micro_recall": 0.7818061964403428, "eval_runtime": 2.8198, "eval_samples_per_second": 342.58, "eval_steps_per_second": 21.633, "step": 1846 }, { "epoch": 28.0, "eval_loss": 0.10863872617483139, "eval_macro_f1": 0.7318024399797272, "eval_macro_precision": 0.8781101314061108, "eval_macro_recall": 0.6548756753680312, "eval_micro_f1": 0.8389143461402891, "eval_micro_precision": 0.9015151515151515, "eval_micro_recall": 0.7844429795649308, "eval_runtime": 2.8198, "eval_samples_per_second": 342.583, "eval_steps_per_second": 21.633, "step": 1988 }, { "epoch": 28.17, "learning_rate": 2.214818407002782e-05, "loss": 0.0082, "step": 2000 }, { "epoch": 30.0, "eval_loss": 0.1103406548500061, "eval_macro_f1": 0.728656455732704, "eval_macro_precision": 0.8784457053881349, "eval_macro_recall": 0.6552278977528909, "eval_micro_f1": 0.8400702987697715, "eval_micro_precision": 0.8998493975903614, "eval_micro_recall": 0.7877389584706658, "eval_runtime": 2.8227, "eval_samples_per_second": 342.231, "eval_steps_per_second": 21.611, "step": 2130 }, { "epoch": 32.0, "eval_loss": 0.11085448414087296, "eval_macro_f1": 0.7344651790603305, "eval_macro_precision": 0.8647882525531505, "eval_macro_recall": 0.6579821769650391, "eval_micro_f1": 0.8402116402116402, "eval_micro_precision": 0.9036418816388467, "eval_micro_recall": 0.7851021753460777, "eval_runtime": 2.8204, "eval_samples_per_second": 342.508, "eval_steps_per_second": 21.628, "step": 2272 }, { "epoch": 34.0, "eval_loss": 0.11268670856952667, "eval_macro_f1": 0.7371526225457369, "eval_macro_precision": 0.8672638823695995, "eval_macro_recall": 0.6591834104386446, "eval_micro_f1": 0.8397323001056711, "eval_micro_precision": 0.9016641452344932, "eval_micro_recall": 0.7857613711272248, "eval_runtime": 2.8203, "eval_samples_per_second": 342.515, "eval_steps_per_second": 21.629, "step": 2414 }, { "epoch": 35.21, "learning_rate": 1.9976793474927056e-05, "loss": 0.0056, "step": 2500 }, { "epoch": 36.0, "eval_loss": 0.11449939757585526, "eval_macro_f1": 0.7435877211427984, "eval_macro_precision": 0.8803719443456752, "eval_macro_recall": 0.6671207777218493, "eval_micro_f1": 0.8435852372583479, "eval_micro_precision": 0.9036144578313253, "eval_micro_recall": 0.7910349373764007, "eval_runtime": 2.8219, "eval_samples_per_second": 342.324, "eval_steps_per_second": 21.617, "step": 2556 }, { "epoch": 38.0, "eval_loss": 0.1153542622923851, "eval_macro_f1": 0.7450728657264144, "eval_macro_precision": 0.879277762192774, "eval_macro_recall": 0.6681699293595025, "eval_micro_f1": 0.8419570573741639, "eval_micro_precision": 0.9033232628398792, "eval_micro_recall": 0.7883981542518128, "eval_runtime": 2.8191, "eval_samples_per_second": 342.661, "eval_steps_per_second": 21.638, "step": 2698 }, { "epoch": 40.0, "eval_loss": 0.11756419390439987, "eval_macro_f1": 0.744466331732378, "eval_macro_precision": 0.8796020530253235, "eval_macro_recall": 0.6677481100215579, "eval_micro_f1": 0.8419570573741639, "eval_micro_precision": 0.9033232628398792, "eval_micro_recall": 0.7883981542518128, "eval_runtime": 2.8181, "eval_samples_per_second": 342.785, "eval_steps_per_second": 21.646, "step": 2840 }, { "epoch": 42.0, "eval_loss": 0.11846820265054703, "eval_macro_f1": 0.7434279482403103, "eval_macro_precision": 0.8813168271357439, "eval_macro_recall": 0.6651456053129234, "eval_micro_f1": 0.8427717200140696, "eval_micro_precision": 0.9034690799396682, "eval_micro_recall": 0.7897165458141068, "eval_runtime": 2.8203, "eval_samples_per_second": 342.52, "eval_steps_per_second": 21.629, "step": 2982 }, { "epoch": 42.25, "learning_rate": 1.7805402879826288e-05, "loss": 0.004, "step": 3000 }, { "epoch": 44.0, "eval_loss": 0.11996057629585266, "eval_macro_f1": 0.7394753977857482, "eval_macro_precision": 0.8783969916238008, "eval_macro_recall": 0.6650662737686901, "eval_micro_f1": 0.8427717200140696, "eval_micro_precision": 0.9034690799396682, "eval_micro_recall": 0.7897165458141068, "eval_runtime": 2.82, "eval_samples_per_second": 342.552, "eval_steps_per_second": 21.631, "step": 3124 }, { "epoch": 46.0, "eval_loss": 0.12046220153570175, "eval_macro_f1": 0.7423287819151947, "eval_macro_precision": 0.8810998402784964, "eval_macro_recall": 0.6639465302563645, "eval_micro_f1": 0.8432546671363156, "eval_micro_precision": 0.905446293494705, "eval_micro_recall": 0.7890573500329597, "eval_runtime": 2.819, "eval_samples_per_second": 342.677, "eval_steps_per_second": 21.639, "step": 3266 }, { "epoch": 48.0, "eval_loss": 0.12299305200576782, "eval_macro_f1": 0.7584256567760465, "eval_macro_precision": 0.9421857420436691, "eval_macro_recall": 0.6745195368122752, "eval_micro_f1": 0.8419570573741639, "eval_micro_precision": 0.9033232628398792, "eval_micro_recall": 0.7883981542518128, "eval_runtime": 2.8184, "eval_samples_per_second": 342.744, "eval_steps_per_second": 21.643, "step": 3408 }, { "epoch": 49.3, "learning_rate": 1.563401228472552e-05, "loss": 0.0031, "step": 3500 }, { "epoch": 50.0, "eval_loss": 0.12287621200084686, "eval_macro_f1": 0.7672541719320296, "eval_macro_precision": 0.9303028445349943, "eval_macro_recall": 0.6844658294965831, "eval_micro_f1": 0.8449122807017543, "eval_micro_precision": 0.9032258064516129, "eval_micro_recall": 0.7936717205009888, "eval_runtime": 2.8187, "eval_samples_per_second": 342.711, "eval_steps_per_second": 21.641, "step": 3550 } ], "max_steps": 7100, "num_train_epochs": 100, "total_flos": 3.007255197834797e+16, "trial_name": null, "trial_params": { "adam_epsilon": 6.447418463180699e-08, "learning_rate": 3.0290898801655698e-05, "per_device_eval_batch_size": 16, "per_device_train_batch_size": 32, "seed": 320, "warmup_steps": 125, "weight_decay": 4.5126980713116176e-08 } }