{ "best_metric": 0.9695497407877142, "best_model_checkpoint": "models/pos_final_xlm_en/checkpoint-960", "epoch": 39.98765432098765, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "eval_accuracy": 0.3035943640371897, "eval_f1": 0.2008264425810438, "eval_loss": 3.0061752796173096, "eval_precision": 0.24116944979086247, "eval_recall": 0.17204639246429285, "eval_runtime": 8.5419, "eval_samples_per_second": 808.95, "eval_steps_per_second": 3.161, "step": 60 }, { "epoch": 1.99, "eval_accuracy": 0.8970254640723346, "eval_f1": 0.8625090892213438, "eval_loss": 0.5353450775146484, "eval_precision": 0.8698569221887629, "eval_recall": 0.8552843532822976, "eval_runtime": 8.8286, "eval_samples_per_second": 782.684, "eval_steps_per_second": 3.058, "step": 120 }, { "epoch": 2.99, "eval_accuracy": 0.9690916642704239, "eval_f1": 0.9565594734295436, "eval_loss": 0.13116228580474854, "eval_precision": 0.9577732320280538, "eval_recall": 0.955348787260482, "eval_runtime": 8.7122, "eval_samples_per_second": 793.137, "eval_steps_per_second": 3.099, "step": 180 }, { "epoch": 3.99, "eval_accuracy": 0.9736924502380268, "eval_f1": 0.9624591697465074, "eval_loss": 0.09810493141412735, "eval_precision": 0.9620755729286427, "eval_recall": 0.962843072580274, "eval_runtime": 8.7346, "eval_samples_per_second": 791.107, "eval_steps_per_second": 3.091, "step": 240 }, { "epoch": 4.99, "eval_accuracy": 0.976024793124381, "eval_f1": 0.9655468564286207, "eval_loss": 0.08534899353981018, "eval_precision": 0.9652285898261429, "eval_recall": 0.9658653329855944, "eval_runtime": 9.6188, "eval_samples_per_second": 718.383, "eval_steps_per_second": 2.807, "step": 300 }, { "epoch": 5.99, "eval_accuracy": 0.9768554905907537, "eval_f1": 0.9665754810234248, "eval_loss": 0.07884209603071213, "eval_precision": 0.9655618493570116, "eval_recall": 0.9675912431155362, "eval_runtime": 8.6507, "eval_samples_per_second": 798.781, "eval_steps_per_second": 3.121, "step": 360 }, { "epoch": 6.99, "eval_accuracy": 0.9775456084858941, "eval_f1": 0.9676831206836455, "eval_loss": 0.0745365098118782, "eval_precision": 0.9664282162120806, "eval_recall": 0.9689412883727352, "eval_runtime": 8.6592, "eval_samples_per_second": 797.994, "eval_steps_per_second": 3.118, "step": 420 }, { "epoch": 7.99, "eval_accuracy": 0.9780376369852072, "eval_f1": 0.9681889956921002, "eval_loss": 0.07183900475502014, "eval_precision": 0.9675067024128686, "eval_recall": 0.9688722519675376, "eval_runtime": 8.714, "eval_samples_per_second": 792.979, "eval_steps_per_second": 3.098, "step": 480 }, { "epoch": 8.33, "learning_rate": 4.99e-05, "loss": 0.7956, "step": 500 }, { "epoch": 8.99, "eval_accuracy": 0.9779226173360172, "eval_f1": 0.9680838417498475, "eval_loss": 0.07068216055631638, "eval_precision": 0.9679168168329358, "eval_recall": 0.9682509243207584, "eval_runtime": 8.6148, "eval_samples_per_second": 802.109, "eval_steps_per_second": 3.134, "step": 540 }, { "epoch": 9.99, "eval_accuracy": 0.9785999552701364, "eval_f1": 0.9690196980148693, "eval_loss": 0.06864651292562485, "eval_precision": 0.9682478900853156, "eval_recall": 0.9697927373701732, "eval_runtime": 9.3629, "eval_samples_per_second": 738.021, "eval_steps_per_second": 2.884, "step": 600 }, { "epoch": 10.99, "eval_accuracy": 0.9786830250167737, "eval_f1": 0.9691608673146603, "eval_loss": 0.06855177879333496, "eval_precision": 0.9688896725672537, "eval_recall": 0.9694322139208076, "eval_runtime": 9.7452, "eval_samples_per_second": 709.065, "eval_steps_per_second": 2.771, "step": 660 }, { "epoch": 11.99, "eval_accuracy": 0.9787149749193265, "eval_f1": 0.9692986487728651, "eval_loss": 0.06800223141908646, "eval_precision": 0.9679492082918993, "eval_recall": 0.9706518570792998, "eval_runtime": 8.6538, "eval_samples_per_second": 798.493, "eval_steps_per_second": 3.12, "step": 720 }, { "epoch": 12.99, "eval_accuracy": 0.97886833445158, "eval_f1": 0.9694478391289856, "eval_loss": 0.06851476430892944, "eval_precision": 0.968346050526928, "eval_recall": 0.9705521378273476, "eval_runtime": 8.6111, "eval_samples_per_second": 802.455, "eval_steps_per_second": 3.135, "step": 780 }, { "epoch": 13.99, "eval_accuracy": 0.9788299945685166, "eval_f1": 0.9694149394930217, "eval_loss": 0.06948242336511612, "eval_precision": 0.9688690015554006, "eval_recall": 0.969961493027323, "eval_runtime": 8.5894, "eval_samples_per_second": 804.476, "eval_steps_per_second": 3.143, "step": 840 }, { "epoch": 14.99, "eval_accuracy": 0.9786382951531998, "eval_f1": 0.9690144083384428, "eval_loss": 0.07028312981128693, "eval_precision": 0.9681685222904575, "eval_recall": 0.9698617737753709, "eval_runtime": 8.8189, "eval_samples_per_second": 783.547, "eval_steps_per_second": 3.062, "step": 900 }, { "epoch": 15.99, "eval_accuracy": 0.9790089140228122, "eval_f1": 0.9695497407877142, "eval_loss": 0.07188576459884644, "eval_precision": 0.9686181737446121, "eval_recall": 0.97048310142215, "eval_runtime": 8.5817, "eval_samples_per_second": 805.199, "eval_steps_per_second": 3.146, "step": 960 }, { "epoch": 16.66, "learning_rate": 3.686842105263158e-05, "loss": 0.051, "step": 1000 }, { "epoch": 16.99, "eval_accuracy": 0.978823604588006, "eval_f1": 0.9694240468488908, "eval_loss": 0.07346100360155106, "eval_precision": 0.9687112241302716, "eval_recall": 0.9701379193961616, "eval_runtime": 9.6099, "eval_samples_per_second": 719.048, "eval_steps_per_second": 2.81, "step": 1020 }, { "epoch": 17.99, "eval_accuracy": 0.9786830250167737, "eval_f1": 0.9692255223920633, "eval_loss": 0.07468883693218231, "eval_precision": 0.9683683392420959, "eval_recall": 0.9700842244143412, "eval_runtime": 8.7732, "eval_samples_per_second": 787.628, "eval_steps_per_second": 3.078, "step": 1080 }, { "epoch": 18.99, "eval_accuracy": 0.9785743953480942, "eval_f1": 0.9691213222329547, "eval_loss": 0.07609081268310547, "eval_precision": 0.9685273432113142, "eval_recall": 0.9697160302532869, "eval_runtime": 8.6582, "eval_samples_per_second": 798.088, "eval_steps_per_second": 3.118, "step": 1140 }, { "epoch": 19.99, "eval_accuracy": 0.9784210358158407, "eval_f1": 0.9688278250741372, "eval_loss": 0.07741989195346832, "eval_precision": 0.9678266327811629, "eval_recall": 0.9698310909286163, "eval_runtime": 8.7584, "eval_samples_per_second": 788.959, "eval_steps_per_second": 3.083, "step": 1200 }, { "epoch": 20.99, "eval_accuracy": 0.9784977155819675, "eval_f1": 0.9689622916379138, "eval_loss": 0.0795513391494751, "eval_precision": 0.9685464216189702, "eval_recall": 0.9693785189389872, "eval_runtime": 8.6744, "eval_samples_per_second": 796.596, "eval_steps_per_second": 3.113, "step": 1260 }, { "epoch": 21.99, "eval_accuracy": 0.9785999552701364, "eval_f1": 0.9690767468323875, "eval_loss": 0.07958221435546875, "eval_precision": 0.9680789987369388, "eval_recall": 0.9700765537026526, "eval_runtime": 8.6133, "eval_samples_per_second": 802.246, "eval_steps_per_second": 3.135, "step": 1320 }, { "epoch": 22.99, "eval_accuracy": 0.9783954758937985, "eval_f1": 0.9686965590754671, "eval_loss": 0.08197388052940369, "eval_precision": 0.968436627924806, "eval_recall": 0.9689566297961125, "eval_runtime": 8.7277, "eval_samples_per_second": 791.73, "eval_steps_per_second": 3.094, "step": 1380 }, { "epoch": 23.99, "eval_accuracy": 0.978127096712355, "eval_f1": 0.9683426871530653, "eval_loss": 0.08289676904678345, "eval_precision": 0.9678825963675377, "eval_recall": 0.9688032155623398, "eval_runtime": 8.6527, "eval_samples_per_second": 798.592, "eval_steps_per_second": 3.12, "step": 1440 }, { "epoch": 24.99, "learning_rate": 2.3710526315789475e-05, "loss": 0.0318, "step": 1500 }, { "epoch": 24.99, "eval_accuracy": 0.9782101664589923, "eval_f1": 0.9685605958957412, "eval_loss": 0.08542540669441223, "eval_precision": 0.96811894087443, "eval_recall": 0.9690026540662443, "eval_runtime": 8.7516, "eval_samples_per_second": 789.565, "eval_steps_per_second": 3.085, "step": 1500 }, { "epoch": 25.99, "eval_accuracy": 0.9781526566343972, "eval_f1": 0.9684325094947744, "eval_loss": 0.08812534809112549, "eval_precision": 0.9676870878552774, "eval_recall": 0.9691790804350827, "eval_runtime": 9.0034, "eval_samples_per_second": 767.488, "eval_steps_per_second": 2.999, "step": 1560 }, { "epoch": 26.99, "eval_accuracy": 0.9782548963225662, "eval_f1": 0.9684803649117427, "eval_loss": 0.08933103829622269, "eval_precision": 0.9679127176886124, "eval_recall": 0.9690486783363761, "eval_runtime": 8.7744, "eval_samples_per_second": 787.518, "eval_steps_per_second": 3.077, "step": 1620 }, { "epoch": 27.99, "eval_accuracy": 0.9780951468098023, "eval_f1": 0.9683360927152317, "eval_loss": 0.090970478951931, "eval_precision": 0.9676092584366048, "eval_recall": 0.9690640197597533, "eval_runtime": 8.5971, "eval_samples_per_second": 803.763, "eval_steps_per_second": 3.141, "step": 1680 }, { "epoch": 28.99, "eval_accuracy": 0.9782612863030767, "eval_f1": 0.9684535086171853, "eval_loss": 0.09189366549253464, "eval_precision": 0.9683569544143813, "eval_recall": 0.9685500820766151, "eval_runtime": 8.7669, "eval_samples_per_second": 788.195, "eval_steps_per_second": 3.08, "step": 1740 }, { "epoch": 29.99, "eval_accuracy": 0.9780759768682705, "eval_f1": 0.9681861749031936, "eval_loss": 0.09329535067081451, "eval_precision": 0.9678225410841305, "eval_recall": 0.9685500820766151, "eval_runtime": 8.9984, "eval_samples_per_second": 767.912, "eval_steps_per_second": 3.001, "step": 1800 }, { "epoch": 30.99, "eval_accuracy": 0.9780887568292916, "eval_f1": 0.968276115855809, "eval_loss": 0.0947079062461853, "eval_precision": 0.967741935483871, "eval_recall": 0.9688108862740286, "eval_runtime": 9.134, "eval_samples_per_second": 756.516, "eval_steps_per_second": 2.956, "step": 1860 }, { "epoch": 31.99, "eval_accuracy": 0.9782804562446085, "eval_f1": 0.9685832975657613, "eval_loss": 0.09658045321702957, "eval_precision": 0.9677970255326318, "eval_recall": 0.9693708482272986, "eval_runtime": 8.9466, "eval_samples_per_second": 772.361, "eval_steps_per_second": 3.018, "step": 1920 }, { "epoch": 32.99, "eval_accuracy": 0.9780951468098023, "eval_f1": 0.9683005734261446, "eval_loss": 0.09742453694343567, "eval_precision": 0.967721916611759, "eval_recall": 0.9688799226792262, "eval_runtime": 9.0098, "eval_samples_per_second": 766.942, "eval_steps_per_second": 2.997, "step": 1980 }, { "epoch": 33.33, "learning_rate": 1.055263157894737e-05, "loss": 0.0211, "step": 2000 }, { "epoch": 33.99, "eval_accuracy": 0.9784274257963513, "eval_f1": 0.9688261902936441, "eval_loss": 0.09810397773981094, "eval_precision": 0.968351035296642, "eval_recall": 0.9693018118221008, "eval_runtime": 8.8808, "eval_samples_per_second": 778.087, "eval_steps_per_second": 3.04, "step": 2040 }, { "epoch": 34.99, "eval_accuracy": 0.978286846225119, "eval_f1": 0.9685722171959579, "eval_loss": 0.09894430637359619, "eval_precision": 0.9681268488573487, "eval_recall": 0.9690179954896215, "eval_runtime": 9.0496, "eval_samples_per_second": 763.573, "eval_steps_per_second": 2.984, "step": 2100 }, { "epoch": 35.99, "eval_accuracy": 0.9783507460302246, "eval_f1": 0.9687094017421564, "eval_loss": 0.10078005492687225, "eval_precision": 0.9679341374688876, "eval_recall": 0.969485908902628, "eval_runtime": 8.9188, "eval_samples_per_second": 774.772, "eval_steps_per_second": 3.027, "step": 2160 }, { "epoch": 36.99, "eval_accuracy": 0.9782229464200134, "eval_f1": 0.9684928880880267, "eval_loss": 0.10152223706245422, "eval_precision": 0.9681291390728477, "eval_recall": 0.9688569105441602, "eval_runtime": 8.9289, "eval_samples_per_second": 773.895, "eval_steps_per_second": 3.024, "step": 2220 }, { "epoch": 37.99, "eval_accuracy": 0.97806958688776, "eval_f1": 0.9682817728476643, "eval_loss": 0.10151796787977219, "eval_precision": 0.9676920130243248, "eval_recall": 0.9688722519675376, "eval_runtime": 9.2785, "eval_samples_per_second": 744.734, "eval_steps_per_second": 2.91, "step": 2280 }, { "epoch": 38.99, "eval_accuracy": 0.9781654365954184, "eval_f1": 0.9684071725914399, "eval_loss": 0.10238787531852722, "eval_precision": 0.9678506849734898, "eval_recall": 0.9689643005078011, "eval_runtime": 8.88, "eval_samples_per_second": 778.153, "eval_steps_per_second": 3.041, "step": 2340 }, { "epoch": 39.99, "eval_accuracy": 0.9782165564395029, "eval_f1": 0.9685044199615122, "eval_loss": 0.10218308120965958, "eval_precision": 0.9680220083374204, "eval_recall": 0.968987312642867, "eval_runtime": 9.7749, "eval_samples_per_second": 706.909, "eval_steps_per_second": 2.762, "step": 2400 }, { "epoch": 39.99, "step": 2400, "total_flos": 1.3719917000335334e+17, "train_loss": 0.19011780440807344, "train_runtime": 1964.637, "train_samples_per_second": 1266.168, "train_steps_per_second": 1.222 } ], "max_steps": 2400, "num_train_epochs": 40, "total_flos": 1.3719917000335334e+17, "trial_name": null, "trial_params": null }