|
{ |
|
"best_metric": 0.9704877076819325, |
|
"best_model_checkpoint": "models/pos_final_mono_en/checkpoint-900", |
|
"epoch": 39.98765432098765, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.2832997859356529, |
|
"eval_f1": 0.15215239835327324, |
|
"eval_loss": 2.7932815551757812, |
|
"eval_precision": 0.3215832858875659, |
|
"eval_recall": 0.09965021554699845, |
|
"eval_runtime": 8.3095, |
|
"eval_samples_per_second": 831.582, |
|
"eval_steps_per_second": 3.249, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.92244480654334, |
|
"eval_f1": 0.9032193422583952, |
|
"eval_loss": 0.3818030059337616, |
|
"eval_precision": 0.9075490985688619, |
|
"eval_recall": 0.8989307027906049, |
|
"eval_runtime": 8.786, |
|
"eval_samples_per_second": 786.48, |
|
"eval_steps_per_second": 3.073, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.9720821751493658, |
|
"eval_f1": 0.9604647150169666, |
|
"eval_loss": 0.11557099223136902, |
|
"eval_precision": 0.9601959536641649, |
|
"eval_recall": 0.9607336268659006, |
|
"eval_runtime": 8.8137, |
|
"eval_samples_per_second": 784.005, |
|
"eval_steps_per_second": 3.063, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.974849036710438, |
|
"eval_f1": 0.9641873067091794, |
|
"eval_loss": 0.09111332893371582, |
|
"eval_precision": 0.9634156614972238, |
|
"eval_recall": 0.964960189006336, |
|
"eval_runtime": 8.5212, |
|
"eval_samples_per_second": 810.917, |
|
"eval_steps_per_second": 3.169, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9772261094603661, |
|
"eval_f1": 0.9671334513708446, |
|
"eval_loss": 0.07944779098033905, |
|
"eval_precision": 0.9663853317811408, |
|
"eval_recall": 0.9678827301597042, |
|
"eval_runtime": 8.3156, |
|
"eval_samples_per_second": 830.964, |
|
"eval_steps_per_second": 3.247, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.9781015367903128, |
|
"eval_f1": 0.9683213898602403, |
|
"eval_loss": 0.07408788055181503, |
|
"eval_precision": 0.9669917766303309, |
|
"eval_recall": 0.9696546645597779, |
|
"eval_runtime": 8.4486, |
|
"eval_samples_per_second": 817.888, |
|
"eval_steps_per_second": 3.196, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.9787405348413687, |
|
"eval_f1": 0.9692896581055761, |
|
"eval_loss": 0.06950810551643372, |
|
"eval_precision": 0.9683435282228738, |
|
"eval_recall": 0.9702376386481137, |
|
"eval_runtime": 8.5813, |
|
"eval_samples_per_second": 805.242, |
|
"eval_steps_per_second": 3.146, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_accuracy": 0.9788555544905588, |
|
"eval_f1": 0.9692969848880303, |
|
"eval_loss": 0.06879482418298721, |
|
"eval_precision": 0.9686027896716276, |
|
"eval_recall": 0.9699921758740776, |
|
"eval_runtime": 8.4632, |
|
"eval_samples_per_second": 816.474, |
|
"eval_steps_per_second": 3.19, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7281, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9788938943736222, |
|
"eval_f1": 0.9695171959192747, |
|
"eval_loss": 0.06750848144292831, |
|
"eval_precision": 0.9687672323999755, |
|
"eval_recall": 0.9702683214948683, |
|
"eval_runtime": 8.3585, |
|
"eval_samples_per_second": 826.702, |
|
"eval_steps_per_second": 3.23, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.9791367136330235, |
|
"eval_f1": 0.9696259085980099, |
|
"eval_loss": 0.06700527667999268, |
|
"eval_precision": 0.9687090871505899, |
|
"eval_recall": 0.970544467115659, |
|
"eval_runtime": 8.4143, |
|
"eval_samples_per_second": 821.219, |
|
"eval_steps_per_second": 3.209, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.9792261733601713, |
|
"eval_f1": 0.9698826362182866, |
|
"eval_loss": 0.06581725925207138, |
|
"eval_precision": 0.9695815158646807, |
|
"eval_recall": 0.9701839436662933, |
|
"eval_runtime": 8.2031, |
|
"eval_samples_per_second": 842.369, |
|
"eval_steps_per_second": 3.291, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_accuracy": 0.9792900731652768, |
|
"eval_f1": 0.9699631623689432, |
|
"eval_loss": 0.06702851504087448, |
|
"eval_precision": 0.968427894173421, |
|
"eval_recall": 0.9715033060767378, |
|
"eval_runtime": 8.414, |
|
"eval_samples_per_second": 821.254, |
|
"eval_steps_per_second": 3.209, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.9792261733601713, |
|
"eval_f1": 0.9699927596470999, |
|
"eval_loss": 0.0671982690691948, |
|
"eval_precision": 0.9688607265575376, |
|
"eval_recall": 0.9711274412039949, |
|
"eval_runtime": 9.0147, |
|
"eval_samples_per_second": 766.523, |
|
"eval_steps_per_second": 2.995, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.9795584523467203, |
|
"eval_f1": 0.9702842773467448, |
|
"eval_loss": 0.06784472614526749, |
|
"eval_precision": 0.969763842275451, |
|
"eval_recall": 0.9708052713130725, |
|
"eval_runtime": 8.2137, |
|
"eval_samples_per_second": 841.282, |
|
"eval_steps_per_second": 3.287, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.9796479120738681, |
|
"eval_f1": 0.9704877076819325, |
|
"eval_loss": 0.06808918714523315, |
|
"eval_precision": 0.969581195926805, |
|
"eval_recall": 0.971395916113097, |
|
"eval_runtime": 8.1766, |
|
"eval_samples_per_second": 845.097, |
|
"eval_steps_per_second": 3.302, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_accuracy": 0.9794817725805937, |
|
"eval_f1": 0.9703219971333746, |
|
"eval_loss": 0.07063417881727219, |
|
"eval_precision": 0.9695714110654985, |
|
"eval_recall": 0.9710737462221745, |
|
"eval_runtime": 8.3057, |
|
"eval_samples_per_second": 831.963, |
|
"eval_steps_per_second": 3.251, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 3.6842105263157895e-05, |
|
"loss": 0.0484, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.9793284130483402, |
|
"eval_f1": 0.9699285875827489, |
|
"eval_loss": 0.07248909771442413, |
|
"eval_precision": 0.9693823603778934, |
|
"eval_recall": 0.9704754307104613, |
|
"eval_runtime": 8.1639, |
|
"eval_samples_per_second": 846.409, |
|
"eval_steps_per_second": 3.307, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.9790855937889389, |
|
"eval_f1": 0.9696844283497156, |
|
"eval_loss": 0.0734858438372612, |
|
"eval_precision": 0.9688564886782195, |
|
"eval_recall": 0.9705137842689044, |
|
"eval_runtime": 8.302, |
|
"eval_samples_per_second": 832.333, |
|
"eval_steps_per_second": 3.252, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.9791878334771079, |
|
"eval_f1": 0.9697733866300795, |
|
"eval_loss": 0.0745043233036995, |
|
"eval_precision": 0.969041765278065, |
|
"eval_recall": 0.9705061135572158, |
|
"eval_runtime": 8.2203, |
|
"eval_samples_per_second": 840.607, |
|
"eval_steps_per_second": 3.285, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_accuracy": 0.9791111537109812, |
|
"eval_f1": 0.9697643226671777, |
|
"eval_loss": 0.07685930281877518, |
|
"eval_precision": 0.9689548489860933, |
|
"eval_recall": 0.9705751499624136, |
|
"eval_runtime": 8.1933, |
|
"eval_samples_per_second": 843.373, |
|
"eval_steps_per_second": 3.295, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.9790600338668967, |
|
"eval_f1": 0.9696818704484477, |
|
"eval_loss": 0.0796540305018425, |
|
"eval_precision": 0.969057869980235, |
|
"eval_recall": 0.9703066750533115, |
|
"eval_runtime": 8.1909, |
|
"eval_samples_per_second": 843.621, |
|
"eval_steps_per_second": 3.296, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"eval_accuracy": 0.9790983737499601, |
|
"eval_f1": 0.9697178726633098, |
|
"eval_loss": 0.08079346269369125, |
|
"eval_precision": 0.9689232654311534, |
|
"eval_recall": 0.9705137842689044, |
|
"eval_runtime": 8.2659, |
|
"eval_samples_per_second": 835.967, |
|
"eval_steps_per_second": 3.266, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.9791047637304706, |
|
"eval_f1": 0.9696795461514873, |
|
"eval_loss": 0.08375120162963867, |
|
"eval_precision": 0.9691297485327245, |
|
"eval_recall": 0.9702299679364251, |
|
"eval_runtime": 8.1982, |
|
"eval_samples_per_second": 842.873, |
|
"eval_steps_per_second": 3.293, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"eval_accuracy": 0.9789322342566855, |
|
"eval_f1": 0.9694696267232725, |
|
"eval_loss": 0.08609236031770706, |
|
"eval_precision": 0.9685270249578931, |
|
"eval_recall": 0.9704140650169523, |
|
"eval_runtime": 8.2431, |
|
"eval_samples_per_second": 838.274, |
|
"eval_steps_per_second": 3.275, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 0.0289, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.978663855075242, |
|
"eval_f1": 0.9691399662731872, |
|
"eval_loss": 0.08786529302597046, |
|
"eval_precision": 0.9684421771833878, |
|
"eval_recall": 0.969838761640305, |
|
"eval_runtime": 8.3812, |
|
"eval_samples_per_second": 824.467, |
|
"eval_steps_per_second": 3.222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"eval_accuracy": 0.9788747244320904, |
|
"eval_f1": 0.9693623412750109, |
|
"eval_loss": 0.08869566768407822, |
|
"eval_precision": 0.968419843821773, |
|
"eval_recall": 0.9703066750533115, |
|
"eval_runtime": 8.2152, |
|
"eval_samples_per_second": 841.126, |
|
"eval_steps_per_second": 3.287, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.9787341448608582, |
|
"eval_f1": 0.9691018771893516, |
|
"eval_loss": 0.09096662700176239, |
|
"eval_precision": 0.9683967033303716, |
|
"eval_recall": 0.9698080787935505, |
|
"eval_runtime": 8.2409, |
|
"eval_samples_per_second": 838.5, |
|
"eval_steps_per_second": 3.276, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"eval_accuracy": 0.9786702450557526, |
|
"eval_f1": 0.9690532771176695, |
|
"eval_loss": 0.09239726513624191, |
|
"eval_precision": 0.9684296811558675, |
|
"eval_recall": 0.9696776766948437, |
|
"eval_runtime": 8.2167, |
|
"eval_samples_per_second": 840.968, |
|
"eval_steps_per_second": 3.286, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.9788299945685166, |
|
"eval_f1": 0.9692778570442306, |
|
"eval_loss": 0.09497389197349548, |
|
"eval_precision": 0.9693075990733212, |
|
"eval_recall": 0.9692481168402804, |
|
"eval_runtime": 8.3979, |
|
"eval_samples_per_second": 822.821, |
|
"eval_steps_per_second": 3.215, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_accuracy": 0.9788811144126011, |
|
"eval_f1": 0.9694443698883832, |
|
"eval_loss": 0.09615545719861984, |
|
"eval_precision": 0.9691805239310932, |
|
"eval_recall": 0.9697083595415983, |
|
"eval_runtime": 8.3107, |
|
"eval_samples_per_second": 831.454, |
|
"eval_steps_per_second": 3.249, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_accuracy": 0.9787213648998371, |
|
"eval_f1": 0.9692713982912798, |
|
"eval_loss": 0.09773550182580948, |
|
"eval_precision": 0.9686587860355012, |
|
"eval_recall": 0.9698847859104368, |
|
"eval_runtime": 8.3495, |
|
"eval_samples_per_second": 827.598, |
|
"eval_steps_per_second": 3.234, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"eval_accuracy": 0.9788491645100482, |
|
"eval_f1": 0.9693524335969396, |
|
"eval_loss": 0.09792140126228333, |
|
"eval_precision": 0.9688436281158288, |
|
"eval_recall": 0.9698617737753709, |
|
"eval_runtime": 8.217, |
|
"eval_samples_per_second": 840.944, |
|
"eval_steps_per_second": 3.286, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.9787916546854533, |
|
"eval_f1": 0.9692388483638134, |
|
"eval_loss": 0.09997569024562836, |
|
"eval_precision": 0.9686855916944412, |
|
"eval_recall": 0.9697927373701732, |
|
"eval_runtime": 9.2731, |
|
"eval_samples_per_second": 745.164, |
|
"eval_steps_per_second": 2.912, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.018, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"eval_accuracy": 0.9788427745295377, |
|
"eval_f1": 0.9692880908937579, |
|
"eval_loss": 0.10211524367332458, |
|
"eval_precision": 0.9687533522335453, |
|
"eval_recall": 0.9698234202169277, |
|
"eval_runtime": 8.1926, |
|
"eval_samples_per_second": 843.447, |
|
"eval_steps_per_second": 3.296, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"eval_accuracy": 0.9788427745295377, |
|
"eval_f1": 0.9694191594963878, |
|
"eval_loss": 0.10369361937046051, |
|
"eval_precision": 0.968739706929965, |
|
"eval_recall": 0.9700995658377184, |
|
"eval_runtime": 8.1647, |
|
"eval_samples_per_second": 846.325, |
|
"eval_steps_per_second": 3.307, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"eval_accuracy": 0.9789514041982172, |
|
"eval_f1": 0.9695659672319632, |
|
"eval_loss": 0.10349933803081512, |
|
"eval_precision": 0.9688493324856962, |
|
"eval_recall": 0.9702836629182455, |
|
"eval_runtime": 8.3265, |
|
"eval_samples_per_second": 829.884, |
|
"eval_steps_per_second": 3.243, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_accuracy": 0.9788875043931116, |
|
"eval_f1": 0.9694168151938519, |
|
"eval_loss": 0.10418598353862762, |
|
"eval_precision": 0.9688115284726, |
|
"eval_recall": 0.9700228587208322, |
|
"eval_runtime": 8.3843, |
|
"eval_samples_per_second": 824.159, |
|
"eval_steps_per_second": 3.22, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 37.99, |
|
"eval_accuracy": 0.9787405348413687, |
|
"eval_f1": 0.9692040580887735, |
|
"eval_loss": 0.10528801381587982, |
|
"eval_precision": 0.9685395840514766, |
|
"eval_recall": 0.9698694444870595, |
|
"eval_runtime": 9.1629, |
|
"eval_samples_per_second": 754.13, |
|
"eval_steps_per_second": 2.947, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_accuracy": 0.97886833445158, |
|
"eval_f1": 0.969450960550726, |
|
"eval_loss": 0.10520931333303452, |
|
"eval_precision": 0.9688567794922085, |
|
"eval_recall": 0.970045870855898, |
|
"eval_runtime": 8.2422, |
|
"eval_samples_per_second": 838.371, |
|
"eval_steps_per_second": 3.276, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"eval_accuracy": 0.9788491645100482, |
|
"eval_f1": 0.9694007796419167, |
|
"eval_loss": 0.1054077297449112, |
|
"eval_precision": 0.9688177562575179, |
|
"eval_recall": 0.969984505162389, |
|
"eval_runtime": 8.347, |
|
"eval_samples_per_second": 827.841, |
|
"eval_steps_per_second": 3.235, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"step": 2400, |
|
"total_flos": 1.1777248744118362e+17, |
|
"train_loss": 0.17379826227823894, |
|
"train_runtime": 1699.1945, |
|
"train_samples_per_second": 1463.964, |
|
"train_steps_per_second": 1.412 |
|
} |
|
], |
|
"max_steps": 2400, |
|
"num_train_epochs": 40, |
|
"total_flos": 1.1777248744118362e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|