{ "best_metric": 0.9895929814239887, "best_model_checkpoint": "models/pos_final_mono_de/checkpoint-4224", "epoch": 39.994174757281556, "global_step": 5120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "eval_accuracy": 0.9475099341812547, "eval_f1": 0.9428053278974075, "eval_loss": 0.235727921128273, "eval_precision": 0.9442734211134948, "eval_recall": 0.941341792581462, "eval_runtime": 18.99, "eval_samples_per_second": 771.406, "eval_steps_per_second": 3.054, "step": 128 }, { "epoch": 1.99, "eval_accuracy": 0.9852943432700717, "eval_f1": 0.9842332493182053, "eval_loss": 0.05128009244799614, "eval_precision": 0.9842997713944935, "eval_recall": 0.9841667362328519, "eval_runtime": 19.2922, "eval_samples_per_second": 759.323, "eval_steps_per_second": 3.006, "step": 256 }, { "epoch": 2.99, "eval_accuracy": 0.9875228217677473, "eval_f1": 0.9867158568898448, "eval_loss": 0.04063262417912483, "eval_precision": 0.9867884320258268, "eval_recall": 0.9866432924284164, "eval_runtime": 19.7655, "eval_samples_per_second": 741.14, "eval_steps_per_second": 2.934, "step": 384 }, { "epoch": 3.9, "learning_rate": 5e-05, "loss": 0.6822, "step": 500 }, { "epoch": 3.99, "eval_accuracy": 0.9884893907546909, "eval_f1": 0.9876851402812782, "eval_loss": 0.036450713872909546, "eval_precision": 0.9876576580157648, "eval_recall": 0.9877126240762605, "eval_runtime": 18.3474, "eval_samples_per_second": 798.424, "eval_steps_per_second": 3.161, "step": 512 }, { "epoch": 4.99, "eval_accuracy": 0.9889918532042529, "eval_f1": 0.9881764176274528, "eval_loss": 0.03515882417559624, "eval_precision": 0.9881194651573207, "eval_recall": 0.9882333766631287, "eval_runtime": 19.0555, "eval_samples_per_second": 768.756, "eval_steps_per_second": 3.044, "step": 640 }, { "epoch": 5.99, "eval_accuracy": 0.9894521241504165, "eval_f1": 0.9887225068869429, "eval_loss": 0.0344870425760746, "eval_precision": 0.9887067858661908, "eval_recall": 0.9887382284076499, "eval_runtime": 18.7765, "eval_samples_per_second": 780.176, "eval_steps_per_second": 3.089, "step": 768 }, { "epoch": 6.99, "eval_accuracy": 0.9896017122079197, "eval_f1": 0.9887940595397575, "eval_loss": 0.03525426983833313, "eval_precision": 0.9887783373812941, "eval_recall": 0.9888097821982119, "eval_runtime": 18.226, "eval_samples_per_second": 803.744, "eval_steps_per_second": 3.182, "step": 896 }, { "epoch": 7.81, "learning_rate": 4.458874458874459e-05, "loss": 0.024, "step": 1000 }, { "epoch": 7.99, "eval_accuracy": 0.9894866444713788, "eval_f1": 0.9887030802192603, "eval_loss": 0.037094976752996445, "eval_precision": 0.9886480621017779, "eval_recall": 0.9887581044605838, "eval_runtime": 18.998, "eval_samples_per_second": 771.08, "eval_steps_per_second": 3.053, "step": 1024 }, { "epoch": 8.99, "eval_accuracy": 0.9895556851133034, "eval_f1": 0.988807816838561, "eval_loss": 0.03866518661379814, "eval_precision": 0.9888058514867228, "eval_recall": 0.9888097821982119, "eval_runtime": 19.63, "eval_samples_per_second": 746.255, "eval_steps_per_second": 2.955, "step": 1152 }, { "epoch": 9.99, "eval_accuracy": 0.9897513002654229, "eval_f1": 0.9889666056460926, "eval_loss": 0.04022372514009476, "eval_precision": 0.9889842973563904, "eval_recall": 0.9889489145687492, "eval_runtime": 17.8198, "eval_samples_per_second": 822.064, "eval_steps_per_second": 3.255, "step": 1280 }, { "epoch": 10.99, "eval_accuracy": 0.9897282867181147, "eval_f1": 0.9889296106084937, "eval_loss": 0.04293292760848999, "eval_precision": 0.9888785102450464, "eval_recall": 0.9889807162534435, "eval_runtime": 18.5105, "eval_samples_per_second": 791.389, "eval_steps_per_second": 3.133, "step": 1408 }, { "epoch": 11.71, "learning_rate": 3.917748917748918e-05, "loss": 0.0128, "step": 1500 }, { "epoch": 11.99, "eval_accuracy": 0.989628561346446, "eval_f1": 0.9888770954828604, "eval_loss": 0.045427996665239334, "eval_precision": 0.9889006825762183, "eval_recall": 0.9888535095146666, "eval_runtime": 18.3742, "eval_samples_per_second": 797.257, "eval_steps_per_second": 3.157, "step": 1536 }, { "epoch": 12.99, "eval_accuracy": 0.9896899308059345, "eval_f1": 0.9889251953792704, "eval_loss": 0.04608777165412903, "eval_precision": 0.9889134021028363, "eval_recall": 0.9889369889369889, "eval_runtime": 18.3253, "eval_samples_per_second": 799.387, "eval_steps_per_second": 3.165, "step": 1664 }, { "epoch": 13.99, "eval_accuracy": 0.989889381549272, "eval_f1": 0.9891329626839416, "eval_loss": 0.04769197106361389, "eval_precision": 0.9892057156034064, "eval_recall": 0.9890602204651792, "eval_runtime": 18.8558, "eval_samples_per_second": 776.896, "eval_steps_per_second": 3.076, "step": 1792 }, { "epoch": 14.99, "eval_accuracy": 0.9897743138127311, "eval_f1": 0.9890247489724366, "eval_loss": 0.0506986528635025, "eval_precision": 0.9889972294324113, "eval_recall": 0.9890522700440055, "eval_runtime": 17.9307, "eval_samples_per_second": 816.978, "eval_steps_per_second": 3.235, "step": 1920 }, { "epoch": 15.62, "learning_rate": 3.376623376623377e-05, "loss": 0.0069, "step": 2000 }, { "epoch": 15.99, "eval_accuracy": 0.9900581475628654, "eval_f1": 0.9893163454944793, "eval_loss": 0.05137912556529045, "eval_precision": 0.9893419096308429, "eval_recall": 0.9892907826792124, "eval_runtime": 18.9861, "eval_samples_per_second": 771.564, "eval_steps_per_second": 3.055, "step": 2048 }, { "epoch": 16.99, "eval_accuracy": 0.989889381549272, "eval_f1": 0.989197257872486, "eval_loss": 0.053016748279333115, "eval_precision": 0.9892070887364145, "eval_recall": 0.9891874272039561, "eval_runtime": 18.1987, "eval_samples_per_second": 804.946, "eval_steps_per_second": 3.187, "step": 2176 }, { "epoch": 17.99, "eval_accuracy": 0.9898203409073475, "eval_f1": 0.9890543664272952, "eval_loss": 0.05524001270532608, "eval_precision": 0.9890445373741871, "eval_recall": 0.989064195675766, "eval_runtime": 18.53, "eval_samples_per_second": 790.554, "eval_steps_per_second": 3.13, "step": 2304 }, { "epoch": 18.99, "eval_accuracy": 0.9898395188634376, "eval_f1": 0.9891659296212747, "eval_loss": 0.0566512756049633, "eval_precision": 0.9891325086653735, "eval_recall": 0.9891993528357165, "eval_runtime": 19.0959, "eval_samples_per_second": 767.129, "eval_steps_per_second": 3.037, "step": 2432 }, { "epoch": 19.53, "learning_rate": 2.8354978354978357e-05, "loss": 0.0037, "step": 2500 }, { "epoch": 19.99, "eval_accuracy": 0.9899507510087605, "eval_f1": 0.989249406222982, "eval_loss": 0.057712409645318985, "eval_precision": 0.9892159824466563, "eval_recall": 0.9892828322580389, "eval_runtime": 18.1495, "eval_samples_per_second": 807.13, "eval_steps_per_second": 3.196, "step": 2560 }, { "epoch": 20.99, "eval_accuracy": 0.989897052731708, "eval_f1": 0.9892537230374182, "eval_loss": 0.05920035019516945, "eval_precision": 0.9891888454322872, "eval_recall": 0.9893186091533199, "eval_runtime": 18.5483, "eval_samples_per_second": 789.775, "eval_steps_per_second": 3.127, "step": 2688 }, { "epoch": 21.99, "eval_accuracy": 0.9899584221911966, "eval_f1": 0.9892630842496084, "eval_loss": 0.06059529632329941, "eval_precision": 0.9892512869437322, "eval_recall": 0.9892748818368653, "eval_runtime": 18.2219, "eval_samples_per_second": 803.923, "eval_steps_per_second": 3.183, "step": 2816 }, { "epoch": 22.99, "eval_accuracy": 0.9899699289648506, "eval_f1": 0.9892710345759693, "eval_loss": 0.06275586783885956, "eval_precision": 0.9892592371752827, "eval_recall": 0.9892828322580389, "eval_runtime": 18.6724, "eval_samples_per_second": 784.529, "eval_steps_per_second": 3.106, "step": 2944 }, { "epoch": 23.43, "learning_rate": 2.2943722943722946e-05, "loss": 0.0023, "step": 3000 }, { "epoch": 23.99, "eval_accuracy": 0.9899162306877982, "eval_f1": 0.9891494254701287, "eval_loss": 0.06293565034866333, "eval_precision": 0.9891710528408098, "eval_recall": 0.9891277990451545, "eval_runtime": 18.198, "eval_samples_per_second": 804.98, "eval_steps_per_second": 3.187, "step": 3072 }, { "epoch": 24.99, "eval_accuracy": 0.9899776001472868, "eval_f1": 0.9892692816043408, "eval_loss": 0.06246413290500641, "eval_precision": 0.9892358571564855, "eval_recall": 0.9893027083109728, "eval_runtime": 18.2292, "eval_samples_per_second": 803.601, "eval_steps_per_second": 3.182, "step": 3200 }, { "epoch": 25.99, "eval_accuracy": 0.990008284877031, "eval_f1": 0.9893007845031315, "eval_loss": 0.06362640857696533, "eval_precision": 0.9892948855550521, "eval_recall": 0.9893066835215596, "eval_runtime": 19.3067, "eval_samples_per_second": 758.751, "eval_steps_per_second": 3.004, "step": 3328 }, { "epoch": 26.99, "eval_accuracy": 0.9900926678838277, "eval_f1": 0.9893981976538494, "eval_loss": 0.0649913027882576, "eval_precision": 0.9893903316465458, "eval_recall": 0.9894060637862291, "eval_runtime": 18.4146, "eval_samples_per_second": 795.511, "eval_steps_per_second": 3.15, "step": 3456 }, { "epoch": 27.34, "learning_rate": 1.7532467532467535e-05, "loss": 0.0017, "step": 3500 }, { "epoch": 27.99, "eval_accuracy": 0.9901003390662637, "eval_f1": 0.989384347826087, "eval_loss": 0.0644073411822319, "eval_precision": 0.9893705826701542, "eval_recall": 0.9893981133650556, "eval_runtime": 18.6787, "eval_samples_per_second": 784.263, "eval_steps_per_second": 3.105, "step": 3584 }, { "epoch": 28.99, "eval_accuracy": 0.9901425305696621, "eval_f1": 0.9894557748763214, "eval_loss": 0.06558605283498764, "eval_precision": 0.9894538082366036, "eval_recall": 0.9894577415238572, "eval_runtime": 18.1086, "eval_samples_per_second": 808.954, "eval_steps_per_second": 3.203, "step": 3712 }, { "epoch": 29.99, "eval_accuracy": 0.9901502017520981, "eval_f1": 0.9894956104173334, "eval_loss": 0.0667632669210434, "eval_precision": 0.989485776979218, "eval_recall": 0.9895054440508986, "eval_runtime": 18.5261, "eval_samples_per_second": 790.723, "eval_steps_per_second": 3.131, "step": 3840 }, { "epoch": 30.99, "eval_accuracy": 0.9901003390662637, "eval_f1": 0.9894474469341146, "eval_loss": 0.06663960218429565, "eval_precision": 0.9894808819203155, "eval_recall": 0.9894140142074026, "eval_runtime": 18.0695, "eval_samples_per_second": 810.702, "eval_steps_per_second": 3.21, "step": 3968 }, { "epoch": 31.25, "learning_rate": 1.2121212121212122e-05, "loss": 0.0011, "step": 4000 }, { "epoch": 31.99, "eval_accuracy": 0.9900466407892112, "eval_f1": 0.9893740508996081, "eval_loss": 0.06780469417572021, "eval_precision": 0.9893937165323654, "eval_recall": 0.9893543860486009, "eval_runtime": 18.1642, "eval_samples_per_second": 806.478, "eval_steps_per_second": 3.193, "step": 4096 }, { "epoch": 32.99, "eval_accuracy": 0.9902230779852407, "eval_f1": 0.9895929814239887, "eval_loss": 0.06849976629018784, "eval_precision": 0.9895851138680967, "eval_recall": 0.9896008491049814, "eval_runtime": 18.9151, "eval_samples_per_second": 774.46, "eval_steps_per_second": 3.066, "step": 4224 }, { "epoch": 33.99, "eval_accuracy": 0.99014636616088, "eval_f1": 0.9894398320867711, "eval_loss": 0.06920044124126434, "eval_precision": 0.9894417987104366, "eval_recall": 0.9894378654709233, "eval_runtime": 18.3423, "eval_samples_per_second": 798.645, "eval_steps_per_second": 3.162, "step": 4352 }, { "epoch": 34.99, "eval_accuracy": 0.9902000644379325, "eval_f1": 0.9895391709648887, "eval_loss": 0.06976373493671417, "eval_precision": 0.9895450714751387, "eval_recall": 0.9895332705250061, "eval_runtime": 18.8004, "eval_samples_per_second": 779.185, "eval_steps_per_second": 3.085, "step": 4480 }, { "epoch": 35.16, "learning_rate": 6.709956709956711e-06, "loss": 0.0009, "step": 4500 }, { "epoch": 35.99, "eval_accuracy": 0.9900658187453014, "eval_f1": 0.9893825501754999, "eval_loss": 0.06981877237558365, "eval_precision": 0.9893510881446884, "eval_recall": 0.9894140142074026, "eval_runtime": 18.1896, "eval_samples_per_second": 805.351, "eval_steps_per_second": 3.189, "step": 4608 }, { "epoch": 36.99, "eval_accuracy": 0.9902039000291505, "eval_f1": 0.9894797097330076, "eval_loss": 0.0695314109325409, "eval_precision": 0.9894698764529106, "eval_recall": 0.9894895432085514, "eval_runtime": 18.7061, "eval_samples_per_second": 783.113, "eval_steps_per_second": 3.101, "step": 4736 }, { "epoch": 37.99, "eval_accuracy": 0.9901732152994063, "eval_f1": 0.9894400419774727, "eval_loss": 0.06961216777563095, "eval_precision": 0.9894223430643007, "eval_recall": 0.9894577415238572, "eval_runtime": 18.6705, "eval_samples_per_second": 784.607, "eval_steps_per_second": 3.107, "step": 4864 }, { "epoch": 38.99, "eval_accuracy": 0.9901962288467144, "eval_f1": 0.9894779103694458, "eval_loss": 0.06985215842723846, "eval_precision": 0.9894503782202383, "eval_recall": 0.9895054440508986, "eval_runtime": 18.2919, "eval_samples_per_second": 800.846, "eval_steps_per_second": 3.171, "step": 4992 }, { "epoch": 39.06, "learning_rate": 1.2987012987012988e-06, "loss": 0.0007, "step": 5000 }, { "epoch": 39.99, "eval_accuracy": 0.990138694978444, "eval_f1": 0.9894261920378432, "eval_loss": 0.06969785690307617, "eval_precision": 0.9894025940986839, "eval_recall": 0.9894497911026837, "eval_runtime": 18.675, "eval_samples_per_second": 784.419, "eval_steps_per_second": 3.106, "step": 5120 }, { "epoch": 39.99, "step": 5120, "total_flos": 2.72643266432467e+17, "train_loss": 0.07192220802244265, "train_runtime": 4057.7347, "train_samples_per_second": 1299.572, "train_steps_per_second": 1.262 } ], "max_steps": 5120, "num_train_epochs": 40, "total_flos": 2.72643266432467e+17, "trial_name": null, "trial_params": null }