{ "best_metric": 0.9743472495313, "best_model_checkpoint": "models/pos_final_mono_fr/checkpoint-560", "epoch": 39.94915254237288, "global_step": 560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.95, "eval_accuracy": 0.021484027736620635, "eval_f1": 0.02014146604497732, "eval_loss": 3.6696622371673584, "eval_precision": 0.02098896013750248, "eval_recall": 0.019359756097560975, "eval_runtime": 1.991, "eval_samples_per_second": 833.259, "eval_steps_per_second": 3.516, "step": 14 }, { "epoch": 1.95, "eval_accuracy": 0.051117169442304274, "eval_f1": 0.049818939975858664, "eval_loss": 3.6328794956207275, "eval_precision": 0.051272950211351684, "eval_recall": 0.048445121951219514, "eval_runtime": 1.9795, "eval_samples_per_second": 838.108, "eval_steps_per_second": 3.536, "step": 28 }, { "epoch": 2.95, "eval_accuracy": 0.12674094707520892, "eval_f1": 0.11132299612354632, "eval_loss": 3.5738770961761475, "eval_precision": 0.11422247882986913, "eval_recall": 0.1085670731707317, "eval_runtime": 2.9806, "eval_samples_per_second": 556.603, "eval_steps_per_second": 2.349, "step": 42 }, { "epoch": 3.95, "eval_accuracy": 0.30611035381971197, "eval_f1": 0.22206610578982539, "eval_loss": 3.4791259765625, "eval_precision": 0.2535112084816713, "eval_recall": 0.1975609756097561, "eval_runtime": 1.9318, "eval_samples_per_second": 858.799, "eval_steps_per_second": 3.624, "step": 56 }, { "epoch": 4.95, "eval_accuracy": 0.3788300835654596, "eval_f1": 0.25393978707978787, "eval_loss": 3.3377487659454346, "eval_precision": 0.339298460283471, "eval_recall": 0.20289634146341465, "eval_runtime": 1.9526, "eval_samples_per_second": 849.649, "eval_steps_per_second": 3.585, "step": 70 }, { "epoch": 5.95, "eval_accuracy": 0.34273691696793696, "eval_f1": 0.20375216215017516, "eval_loss": 3.188615560531616, "eval_precision": 0.3736782170164308, "eval_recall": 0.1400609756097561, "eval_runtime": 1.9288, "eval_samples_per_second": 860.111, "eval_steps_per_second": 3.629, "step": 84 }, { "epoch": 6.95, "eval_accuracy": 0.4599656255556214, "eval_f1": 0.36921620863712845, "eval_loss": 3.0504870414733887, "eval_precision": 0.434243073878628, "eval_recall": 0.3211280487804878, "eval_runtime": 1.9835, "eval_samples_per_second": 836.412, "eval_steps_per_second": 3.529, "step": 98 }, { "epoch": 7.95, "eval_accuracy": 0.5282403840455165, "eval_f1": 0.4701918608510921, "eval_loss": 2.8996212482452393, "eval_precision": 0.5159915488853272, "eval_recall": 0.431859756097561, "eval_runtime": 2.2513, "eval_samples_per_second": 736.924, "eval_steps_per_second": 3.109, "step": 112 }, { "epoch": 8.95, "eval_accuracy": 0.573164226871333, "eval_f1": 0.5221832422289304, "eval_loss": 2.748504161834717, "eval_precision": 0.5617342460944357, "eval_recall": 0.48783536585365855, "eval_runtime": 1.9301, "eval_samples_per_second": 859.535, "eval_steps_per_second": 3.627, "step": 126 }, { "epoch": 9.95, "eval_accuracy": 0.6246369940141053, "eval_f1": 0.5703931402685649, "eval_loss": 2.586193084716797, "eval_precision": 0.6076525336091003, "eval_recall": 0.5374390243902439, "eval_runtime": 2.1049, "eval_samples_per_second": 788.176, "eval_steps_per_second": 3.326, "step": 140 }, { "epoch": 10.95, "eval_accuracy": 0.6886742132400877, "eval_f1": 0.6548560582094275, "eval_loss": 2.420483112335205, "eval_precision": 0.6804733727810651, "eval_recall": 0.6310975609756098, "eval_runtime": 2.1866, "eval_samples_per_second": 758.715, "eval_steps_per_second": 3.201, "step": 154 }, { "epoch": 11.95, "eval_accuracy": 0.7838854975404492, "eval_f1": 0.7690782646407386, "eval_loss": 2.260331869125366, "eval_precision": 0.7816327173125964, "eval_recall": 0.7569207317073171, "eval_runtime": 2.2785, "eval_samples_per_second": 728.116, "eval_steps_per_second": 3.072, "step": 168 }, { "epoch": 12.95, "eval_accuracy": 0.83701772061874, "eval_f1": 0.8335373317013463, "eval_loss": 2.1123812198638916, "eval_precision": 0.8366093366093366, "eval_recall": 0.8304878048780487, "eval_runtime": 2.9322, "eval_samples_per_second": 565.792, "eval_steps_per_second": 2.387, "step": 182 }, { "epoch": 13.95, "eval_accuracy": 0.8735553843418479, "eval_f1": 0.8685824105426924, "eval_loss": 1.9825972318649292, "eval_precision": 0.8690861363775105, "eval_recall": 0.8680792682926829, "eval_runtime": 1.9874, "eval_samples_per_second": 834.748, "eval_steps_per_second": 3.522, "step": 196 }, { "epoch": 14.95, "eval_accuracy": 0.9239613583832158, "eval_f1": 0.9204770765335692, "eval_loss": 1.8721418380737305, "eval_precision": 0.9209546481108466, "eval_recall": 0.92, "eval_runtime": 1.9558, "eval_samples_per_second": 848.258, "eval_steps_per_second": 3.579, "step": 210 }, { "epoch": 15.95, "eval_accuracy": 0.9416819771232146, "eval_f1": 0.9391052511241521, "eval_loss": 1.7779291868209839, "eval_precision": 0.9390336838896509, "eval_recall": 0.9391768292682927, "eval_runtime": 1.9975, "eval_samples_per_second": 830.55, "eval_steps_per_second": 3.504, "step": 224 }, { "epoch": 16.95, "eval_accuracy": 0.9466010786463581, "eval_f1": 0.944723771216138, "eval_loss": 1.698561429977417, "eval_precision": 0.9442346348297497, "eval_recall": 0.9452134146341463, "eval_runtime": 1.9516, "eval_samples_per_second": 850.075, "eval_steps_per_second": 3.587, "step": 238 }, { "epoch": 17.95, "eval_accuracy": 0.9486161322823445, "eval_f1": 0.9471599219892736, "eval_loss": 1.6294448375701904, "eval_precision": 0.9466983430799221, "eval_recall": 0.9476219512195122, "eval_runtime": 1.9621, "eval_samples_per_second": 845.505, "eval_steps_per_second": 3.568, "step": 252 }, { "epoch": 18.95, "eval_accuracy": 0.9498607242339833, "eval_f1": 0.9487042764210301, "eval_loss": 1.5666829347610474, "eval_precision": 0.948140929991778, "eval_recall": 0.9492682926829268, "eval_runtime": 2.1244, "eval_samples_per_second": 780.914, "eval_steps_per_second": 3.295, "step": 266 }, { "epoch": 19.95, "eval_accuracy": 0.9523499081372607, "eval_f1": 0.9514249592542383, "eval_loss": 1.5073306560516357, "eval_precision": 0.9506864326808925, "eval_recall": 0.9521646341463414, "eval_runtime": 1.9609, "eval_samples_per_second": 846.058, "eval_steps_per_second": 3.57, "step": 280 }, { "epoch": 20.95, "eval_accuracy": 0.9551650565993006, "eval_f1": 0.9544048870405071, "eval_loss": 1.4499109983444214, "eval_precision": 0.953780105349694, "eval_recall": 0.9550304878048781, "eval_runtime": 2.9847, "eval_samples_per_second": 555.827, "eval_steps_per_second": 2.345, "step": 294 }, { "epoch": 21.95, "eval_accuracy": 0.9562911159841166, "eval_f1": 0.9558884029925795, "eval_loss": 1.3926490545272827, "eval_precision": 0.9554662036613969, "eval_recall": 0.9563109756097561, "eval_runtime": 2.0934, "eval_samples_per_second": 792.487, "eval_steps_per_second": 3.344, "step": 308 }, { "epoch": 22.95, "eval_accuracy": 0.9612102175072601, "eval_f1": 0.961168007802975, "eval_loss": 1.3373351097106934, "eval_precision": 0.9609336908824964, "eval_recall": 0.9614024390243903, "eval_runtime": 2.5845, "eval_samples_per_second": 641.892, "eval_steps_per_second": 2.708, "step": 322 }, { "epoch": 23.95, "eval_accuracy": 0.9622770106086648, "eval_f1": 0.9623058515097475, "eval_loss": 1.2815097570419312, "eval_precision": 0.962203188343951, "eval_recall": 0.9624085365853658, "eval_runtime": 1.9491, "eval_samples_per_second": 851.178, "eval_steps_per_second": 3.591, "step": 336 }, { "epoch": 24.95, "eval_accuracy": 0.9646180288034137, "eval_f1": 0.9648307087214354, "eval_loss": 1.2245593070983887, "eval_precision": 0.9648748361130591, "eval_recall": 0.9647865853658537, "eval_runtime": 2.9881, "eval_samples_per_second": 555.195, "eval_steps_per_second": 2.343, "step": 350 }, { "epoch": 25.95, "eval_accuracy": 0.9647661945119421, "eval_f1": 0.9652270683110508, "eval_loss": 1.16820228099823, "eval_precision": 0.9652712138305333, "eval_recall": 0.9651829268292683, "eval_runtime": 2.9793, "eval_samples_per_second": 556.84, "eval_steps_per_second": 2.35, "step": 364 }, { "epoch": 26.95, "eval_accuracy": 0.9660996858886979, "eval_f1": 0.9654436860068261, "eval_loss": 1.1113808155059814, "eval_precision": 0.9649731968810916, "eval_recall": 0.9659146341463415, "eval_runtime": 2.0371, "eval_samples_per_second": 814.377, "eval_steps_per_second": 3.436, "step": 378 }, { "epoch": 27.95, "eval_accuracy": 0.9699223611687311, "eval_f1": 0.9672198601014949, "eval_loss": 1.0521485805511475, "eval_precision": 0.9669398823852037, "eval_recall": 0.9675, "eval_runtime": 2.1723, "eval_samples_per_second": 763.722, "eval_steps_per_second": 3.222, "step": 392 }, { "epoch": 28.95, "eval_accuracy": 0.9706928228530789, "eval_f1": 0.9678383074718775, "eval_loss": 0.9949794411659241, "eval_precision": 0.9677498018655124, "eval_recall": 0.9679268292682927, "eval_runtime": 2.9777, "eval_samples_per_second": 557.141, "eval_steps_per_second": 2.351, "step": 406 }, { "epoch": 29.95, "eval_accuracy": 0.9716410833876608, "eval_f1": 0.9688157293095565, "eval_loss": 0.9363731741905212, "eval_precision": 0.968668088997257, "eval_recall": 0.9689634146341464, "eval_runtime": 2.0063, "eval_samples_per_second": 826.911, "eval_steps_per_second": 3.489, "step": 420 }, { "epoch": 30.95, "eval_accuracy": 0.9720559473715403, "eval_f1": 0.9692110718205097, "eval_loss": 0.8799633383750916, "eval_precision": 0.9690929041697147, "eval_recall": 0.969329268292683, "eval_runtime": 1.9828, "eval_samples_per_second": 836.683, "eval_steps_per_second": 3.53, "step": 434 }, { "epoch": 31.95, "eval_accuracy": 0.9726189770639483, "eval_f1": 0.9695962936434156, "eval_loss": 0.8233166337013245, "eval_precision": 0.9693451564737788, "eval_recall": 0.9698475609756098, "eval_runtime": 1.9361, "eval_samples_per_second": 856.891, "eval_steps_per_second": 3.616, "step": 448 }, { "epoch": 32.95, "eval_accuracy": 0.9733301724648847, "eval_f1": 0.9703057833602635, "eval_loss": 0.7679479122161865, "eval_precision": 0.9702762026705689, "eval_recall": 0.9703353658536585, "eval_runtime": 1.9498, "eval_samples_per_second": 850.85, "eval_steps_per_second": 3.59, "step": 462 }, { "epoch": 33.95, "eval_accuracy": 0.9737450364487643, "eval_f1": 0.9711132452249204, "eval_loss": 0.7146441340446472, "eval_precision": 0.9710984421206671, "eval_recall": 0.9711280487804878, "eval_runtime": 1.9736, "eval_samples_per_second": 840.614, "eval_steps_per_second": 3.547, "step": 476 }, { "epoch": 34.95, "eval_accuracy": 0.9749599952586974, "eval_f1": 0.9722899646384587, "eval_loss": 0.6641064286231995, "eval_precision": 0.9721714216044867, "eval_recall": 0.9724085365853659, "eval_runtime": 1.9368, "eval_samples_per_second": 856.569, "eval_steps_per_second": 3.614, "step": 490 }, { "epoch": 35.68, "learning_rate": 5e-05, "loss": 2.0937, "step": 500 }, { "epoch": 35.95, "eval_accuracy": 0.9755230249511053, "eval_f1": 0.9729268292682927, "eval_loss": 0.6186906099319458, "eval_precision": 0.9729268292682927, "eval_recall": 0.9729268292682927, "eval_runtime": 1.993, "eval_samples_per_second": 832.399, "eval_steps_per_second": 3.512, "step": 504 }, { "epoch": 36.95, "eval_accuracy": 0.9756119243762224, "eval_f1": 0.9729490833168234, "eval_loss": 0.5833659172058105, "eval_precision": 0.9726970777341012, "eval_recall": 0.9732012195121951, "eval_runtime": 2.9665, "eval_samples_per_second": 559.252, "eval_steps_per_second": 2.36, "step": 518 }, { "epoch": 37.95, "eval_accuracy": 0.9761749540686303, "eval_f1": 0.9736946383393786, "eval_loss": 0.5605461597442627, "eval_precision": 0.97348692631194, "eval_recall": 0.9739024390243902, "eval_runtime": 2.0134, "eval_samples_per_second": 823.972, "eval_steps_per_second": 3.477, "step": 532 }, { "epoch": 38.95, "eval_accuracy": 0.9764712854856872, "eval_f1": 0.9739244403127238, "eval_loss": 0.5465701222419739, "eval_precision": 0.9736721821007405, "eval_recall": 0.9741768292682926, "eval_runtime": 2.9641, "eval_samples_per_second": 559.689, "eval_steps_per_second": 2.362, "step": 546 }, { "epoch": 39.95, "eval_accuracy": 0.9768268831861554, "eval_f1": 0.9743472495313, "eval_loss": 0.5415592193603516, "eval_precision": 0.974243301734386, "eval_recall": 0.9744512195121952, "eval_runtime": 1.9918, "eval_samples_per_second": 832.894, "eval_steps_per_second": 3.514, "step": 560 }, { "epoch": 39.95, "step": 560, "total_flos": 3.723450094214784e+16, "train_loss": 1.9360494545527867, "train_runtime": 526.0611, "train_samples_per_second": 1135.077, "train_steps_per_second": 1.065 } ], "max_steps": 560, "num_train_epochs": 40, "total_flos": 3.723450094214784e+16, "trial_name": null, "trial_params": null }