|
{ |
|
"best_metric": 0.9695497407877142, |
|
"best_model_checkpoint": "models/pos_final_xlm_en/checkpoint-960", |
|
"epoch": 39.98765432098765, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.3035943640371897, |
|
"eval_f1": 0.2008264425810438, |
|
"eval_loss": 3.0061752796173096, |
|
"eval_precision": 0.24116944979086247, |
|
"eval_recall": 0.17204639246429285, |
|
"eval_runtime": 8.5419, |
|
"eval_samples_per_second": 808.95, |
|
"eval_steps_per_second": 3.161, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.8970254640723346, |
|
"eval_f1": 0.8625090892213438, |
|
"eval_loss": 0.5353450775146484, |
|
"eval_precision": 0.8698569221887629, |
|
"eval_recall": 0.8552843532822976, |
|
"eval_runtime": 8.8286, |
|
"eval_samples_per_second": 782.684, |
|
"eval_steps_per_second": 3.058, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.9690916642704239, |
|
"eval_f1": 0.9565594734295436, |
|
"eval_loss": 0.13116228580474854, |
|
"eval_precision": 0.9577732320280538, |
|
"eval_recall": 0.955348787260482, |
|
"eval_runtime": 8.7122, |
|
"eval_samples_per_second": 793.137, |
|
"eval_steps_per_second": 3.099, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.9736924502380268, |
|
"eval_f1": 0.9624591697465074, |
|
"eval_loss": 0.09810493141412735, |
|
"eval_precision": 0.9620755729286427, |
|
"eval_recall": 0.962843072580274, |
|
"eval_runtime": 8.7346, |
|
"eval_samples_per_second": 791.107, |
|
"eval_steps_per_second": 3.091, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.976024793124381, |
|
"eval_f1": 0.9655468564286207, |
|
"eval_loss": 0.08534899353981018, |
|
"eval_precision": 0.9652285898261429, |
|
"eval_recall": 0.9658653329855944, |
|
"eval_runtime": 9.6188, |
|
"eval_samples_per_second": 718.383, |
|
"eval_steps_per_second": 2.807, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.9768554905907537, |
|
"eval_f1": 0.9665754810234248, |
|
"eval_loss": 0.07884209603071213, |
|
"eval_precision": 0.9655618493570116, |
|
"eval_recall": 0.9675912431155362, |
|
"eval_runtime": 8.6507, |
|
"eval_samples_per_second": 798.781, |
|
"eval_steps_per_second": 3.121, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.9775456084858941, |
|
"eval_f1": 0.9676831206836455, |
|
"eval_loss": 0.0745365098118782, |
|
"eval_precision": 0.9664282162120806, |
|
"eval_recall": 0.9689412883727352, |
|
"eval_runtime": 8.6592, |
|
"eval_samples_per_second": 797.994, |
|
"eval_steps_per_second": 3.118, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_accuracy": 0.9780376369852072, |
|
"eval_f1": 0.9681889956921002, |
|
"eval_loss": 0.07183900475502014, |
|
"eval_precision": 0.9675067024128686, |
|
"eval_recall": 0.9688722519675376, |
|
"eval_runtime": 8.714, |
|
"eval_samples_per_second": 792.979, |
|
"eval_steps_per_second": 3.098, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 4.99e-05, |
|
"loss": 0.7956, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9779226173360172, |
|
"eval_f1": 0.9680838417498475, |
|
"eval_loss": 0.07068216055631638, |
|
"eval_precision": 0.9679168168329358, |
|
"eval_recall": 0.9682509243207584, |
|
"eval_runtime": 8.6148, |
|
"eval_samples_per_second": 802.109, |
|
"eval_steps_per_second": 3.134, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.9785999552701364, |
|
"eval_f1": 0.9690196980148693, |
|
"eval_loss": 0.06864651292562485, |
|
"eval_precision": 0.9682478900853156, |
|
"eval_recall": 0.9697927373701732, |
|
"eval_runtime": 9.3629, |
|
"eval_samples_per_second": 738.021, |
|
"eval_steps_per_second": 2.884, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.9786830250167737, |
|
"eval_f1": 0.9691608673146603, |
|
"eval_loss": 0.06855177879333496, |
|
"eval_precision": 0.9688896725672537, |
|
"eval_recall": 0.9694322139208076, |
|
"eval_runtime": 9.7452, |
|
"eval_samples_per_second": 709.065, |
|
"eval_steps_per_second": 2.771, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_accuracy": 0.9787149749193265, |
|
"eval_f1": 0.9692986487728651, |
|
"eval_loss": 0.06800223141908646, |
|
"eval_precision": 0.9679492082918993, |
|
"eval_recall": 0.9706518570792998, |
|
"eval_runtime": 8.6538, |
|
"eval_samples_per_second": 798.493, |
|
"eval_steps_per_second": 3.12, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.97886833445158, |
|
"eval_f1": 0.9694478391289856, |
|
"eval_loss": 0.06851476430892944, |
|
"eval_precision": 0.968346050526928, |
|
"eval_recall": 0.9705521378273476, |
|
"eval_runtime": 8.6111, |
|
"eval_samples_per_second": 802.455, |
|
"eval_steps_per_second": 3.135, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.9788299945685166, |
|
"eval_f1": 0.9694149394930217, |
|
"eval_loss": 0.06948242336511612, |
|
"eval_precision": 0.9688690015554006, |
|
"eval_recall": 0.969961493027323, |
|
"eval_runtime": 8.5894, |
|
"eval_samples_per_second": 804.476, |
|
"eval_steps_per_second": 3.143, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.9786382951531998, |
|
"eval_f1": 0.9690144083384428, |
|
"eval_loss": 0.07028312981128693, |
|
"eval_precision": 0.9681685222904575, |
|
"eval_recall": 0.9698617737753709, |
|
"eval_runtime": 8.8189, |
|
"eval_samples_per_second": 783.547, |
|
"eval_steps_per_second": 3.062, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_accuracy": 0.9790089140228122, |
|
"eval_f1": 0.9695497407877142, |
|
"eval_loss": 0.07188576459884644, |
|
"eval_precision": 0.9686181737446121, |
|
"eval_recall": 0.97048310142215, |
|
"eval_runtime": 8.5817, |
|
"eval_samples_per_second": 805.199, |
|
"eval_steps_per_second": 3.146, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 3.686842105263158e-05, |
|
"loss": 0.051, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.978823604588006, |
|
"eval_f1": 0.9694240468488908, |
|
"eval_loss": 0.07346100360155106, |
|
"eval_precision": 0.9687112241302716, |
|
"eval_recall": 0.9701379193961616, |
|
"eval_runtime": 9.6099, |
|
"eval_samples_per_second": 719.048, |
|
"eval_steps_per_second": 2.81, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.9786830250167737, |
|
"eval_f1": 0.9692255223920633, |
|
"eval_loss": 0.07468883693218231, |
|
"eval_precision": 0.9683683392420959, |
|
"eval_recall": 0.9700842244143412, |
|
"eval_runtime": 8.7732, |
|
"eval_samples_per_second": 787.628, |
|
"eval_steps_per_second": 3.078, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.9785743953480942, |
|
"eval_f1": 0.9691213222329547, |
|
"eval_loss": 0.07609081268310547, |
|
"eval_precision": 0.9685273432113142, |
|
"eval_recall": 0.9697160302532869, |
|
"eval_runtime": 8.6582, |
|
"eval_samples_per_second": 798.088, |
|
"eval_steps_per_second": 3.118, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_accuracy": 0.9784210358158407, |
|
"eval_f1": 0.9688278250741372, |
|
"eval_loss": 0.07741989195346832, |
|
"eval_precision": 0.9678266327811629, |
|
"eval_recall": 0.9698310909286163, |
|
"eval_runtime": 8.7584, |
|
"eval_samples_per_second": 788.959, |
|
"eval_steps_per_second": 3.083, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.9784977155819675, |
|
"eval_f1": 0.9689622916379138, |
|
"eval_loss": 0.0795513391494751, |
|
"eval_precision": 0.9685464216189702, |
|
"eval_recall": 0.9693785189389872, |
|
"eval_runtime": 8.6744, |
|
"eval_samples_per_second": 796.596, |
|
"eval_steps_per_second": 3.113, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"eval_accuracy": 0.9785999552701364, |
|
"eval_f1": 0.9690767468323875, |
|
"eval_loss": 0.07958221435546875, |
|
"eval_precision": 0.9680789987369388, |
|
"eval_recall": 0.9700765537026526, |
|
"eval_runtime": 8.6133, |
|
"eval_samples_per_second": 802.246, |
|
"eval_steps_per_second": 3.135, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.9783954758937985, |
|
"eval_f1": 0.9686965590754671, |
|
"eval_loss": 0.08197388052940369, |
|
"eval_precision": 0.968436627924806, |
|
"eval_recall": 0.9689566297961125, |
|
"eval_runtime": 8.7277, |
|
"eval_samples_per_second": 791.73, |
|
"eval_steps_per_second": 3.094, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"eval_accuracy": 0.978127096712355, |
|
"eval_f1": 0.9683426871530653, |
|
"eval_loss": 0.08289676904678345, |
|
"eval_precision": 0.9678825963675377, |
|
"eval_recall": 0.9688032155623398, |
|
"eval_runtime": 8.6527, |
|
"eval_samples_per_second": 798.592, |
|
"eval_steps_per_second": 3.12, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"learning_rate": 2.3710526315789475e-05, |
|
"loss": 0.0318, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.9782101664589923, |
|
"eval_f1": 0.9685605958957412, |
|
"eval_loss": 0.08542540669441223, |
|
"eval_precision": 0.96811894087443, |
|
"eval_recall": 0.9690026540662443, |
|
"eval_runtime": 8.7516, |
|
"eval_samples_per_second": 789.565, |
|
"eval_steps_per_second": 3.085, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"eval_accuracy": 0.9781526566343972, |
|
"eval_f1": 0.9684325094947744, |
|
"eval_loss": 0.08812534809112549, |
|
"eval_precision": 0.9676870878552774, |
|
"eval_recall": 0.9691790804350827, |
|
"eval_runtime": 9.0034, |
|
"eval_samples_per_second": 767.488, |
|
"eval_steps_per_second": 2.999, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.9782548963225662, |
|
"eval_f1": 0.9684803649117427, |
|
"eval_loss": 0.08933103829622269, |
|
"eval_precision": 0.9679127176886124, |
|
"eval_recall": 0.9690486783363761, |
|
"eval_runtime": 8.7744, |
|
"eval_samples_per_second": 787.518, |
|
"eval_steps_per_second": 3.077, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"eval_accuracy": 0.9780951468098023, |
|
"eval_f1": 0.9683360927152317, |
|
"eval_loss": 0.090970478951931, |
|
"eval_precision": 0.9676092584366048, |
|
"eval_recall": 0.9690640197597533, |
|
"eval_runtime": 8.5971, |
|
"eval_samples_per_second": 803.763, |
|
"eval_steps_per_second": 3.141, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.9782612863030767, |
|
"eval_f1": 0.9684535086171853, |
|
"eval_loss": 0.09189366549253464, |
|
"eval_precision": 0.9683569544143813, |
|
"eval_recall": 0.9685500820766151, |
|
"eval_runtime": 8.7669, |
|
"eval_samples_per_second": 788.195, |
|
"eval_steps_per_second": 3.08, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_accuracy": 0.9780759768682705, |
|
"eval_f1": 0.9681861749031936, |
|
"eval_loss": 0.09329535067081451, |
|
"eval_precision": 0.9678225410841305, |
|
"eval_recall": 0.9685500820766151, |
|
"eval_runtime": 8.9984, |
|
"eval_samples_per_second": 767.912, |
|
"eval_steps_per_second": 3.001, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_accuracy": 0.9780887568292916, |
|
"eval_f1": 0.968276115855809, |
|
"eval_loss": 0.0947079062461853, |
|
"eval_precision": 0.967741935483871, |
|
"eval_recall": 0.9688108862740286, |
|
"eval_runtime": 9.134, |
|
"eval_samples_per_second": 756.516, |
|
"eval_steps_per_second": 2.956, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"eval_accuracy": 0.9782804562446085, |
|
"eval_f1": 0.9685832975657613, |
|
"eval_loss": 0.09658045321702957, |
|
"eval_precision": 0.9677970255326318, |
|
"eval_recall": 0.9693708482272986, |
|
"eval_runtime": 8.9466, |
|
"eval_samples_per_second": 772.361, |
|
"eval_steps_per_second": 3.018, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.9780951468098023, |
|
"eval_f1": 0.9683005734261446, |
|
"eval_loss": 0.09742453694343567, |
|
"eval_precision": 0.967721916611759, |
|
"eval_recall": 0.9688799226792262, |
|
"eval_runtime": 9.0098, |
|
"eval_samples_per_second": 766.942, |
|
"eval_steps_per_second": 2.997, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 1.055263157894737e-05, |
|
"loss": 0.0211, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"eval_accuracy": 0.9784274257963513, |
|
"eval_f1": 0.9688261902936441, |
|
"eval_loss": 0.09810397773981094, |
|
"eval_precision": 0.968351035296642, |
|
"eval_recall": 0.9693018118221008, |
|
"eval_runtime": 8.8808, |
|
"eval_samples_per_second": 778.087, |
|
"eval_steps_per_second": 3.04, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"eval_accuracy": 0.978286846225119, |
|
"eval_f1": 0.9685722171959579, |
|
"eval_loss": 0.09894430637359619, |
|
"eval_precision": 0.9681268488573487, |
|
"eval_recall": 0.9690179954896215, |
|
"eval_runtime": 9.0496, |
|
"eval_samples_per_second": 763.573, |
|
"eval_steps_per_second": 2.984, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"eval_accuracy": 0.9783507460302246, |
|
"eval_f1": 0.9687094017421564, |
|
"eval_loss": 0.10078005492687225, |
|
"eval_precision": 0.9679341374688876, |
|
"eval_recall": 0.969485908902628, |
|
"eval_runtime": 8.9188, |
|
"eval_samples_per_second": 774.772, |
|
"eval_steps_per_second": 3.027, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_accuracy": 0.9782229464200134, |
|
"eval_f1": 0.9684928880880267, |
|
"eval_loss": 0.10152223706245422, |
|
"eval_precision": 0.9681291390728477, |
|
"eval_recall": 0.9688569105441602, |
|
"eval_runtime": 8.9289, |
|
"eval_samples_per_second": 773.895, |
|
"eval_steps_per_second": 3.024, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 37.99, |
|
"eval_accuracy": 0.97806958688776, |
|
"eval_f1": 0.9682817728476643, |
|
"eval_loss": 0.10151796787977219, |
|
"eval_precision": 0.9676920130243248, |
|
"eval_recall": 0.9688722519675376, |
|
"eval_runtime": 9.2785, |
|
"eval_samples_per_second": 744.734, |
|
"eval_steps_per_second": 2.91, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_accuracy": 0.9781654365954184, |
|
"eval_f1": 0.9684071725914399, |
|
"eval_loss": 0.10238787531852722, |
|
"eval_precision": 0.9678506849734898, |
|
"eval_recall": 0.9689643005078011, |
|
"eval_runtime": 8.88, |
|
"eval_samples_per_second": 778.153, |
|
"eval_steps_per_second": 3.041, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"eval_accuracy": 0.9782165564395029, |
|
"eval_f1": 0.9685044199615122, |
|
"eval_loss": 0.10218308120965958, |
|
"eval_precision": 0.9680220083374204, |
|
"eval_recall": 0.968987312642867, |
|
"eval_runtime": 9.7749, |
|
"eval_samples_per_second": 706.909, |
|
"eval_steps_per_second": 2.762, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"step": 2400, |
|
"total_flos": 1.3719917000335334e+17, |
|
"train_loss": 0.19011780440807344, |
|
"train_runtime": 1964.637, |
|
"train_samples_per_second": 1266.168, |
|
"train_steps_per_second": 1.222 |
|
} |
|
], |
|
"max_steps": 2400, |
|
"num_train_epochs": 40, |
|
"total_flos": 1.3719917000335334e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|