|
{ |
|
"best_metric": 0.9894462659525121, |
|
"best_model_checkpoint": "models/pos_final_xlm_de/checkpoint-4480", |
|
"epoch": 39.994174757281556, |
|
"global_step": 5120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.9195868301139938, |
|
"eval_f1": 0.9132772848631359, |
|
"eval_loss": 0.3827908933162689, |
|
"eval_precision": 0.9159339498123846, |
|
"eval_recall": 0.9106359866475313, |
|
"eval_runtime": 21.0552, |
|
"eval_samples_per_second": 695.744, |
|
"eval_steps_per_second": 2.755, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.982432992221421, |
|
"eval_f1": 0.9811226512328548, |
|
"eval_loss": 0.06585212051868439, |
|
"eval_precision": 0.9810370934113413, |
|
"eval_recall": 0.9812082239789405, |
|
"eval_runtime": 20.8303, |
|
"eval_samples_per_second": 703.256, |
|
"eval_steps_per_second": 2.784, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.9865217324598413, |
|
"eval_f1": 0.9856564607376338, |
|
"eval_loss": 0.04468955472111702, |
|
"eval_precision": 0.9856525531240089, |
|
"eval_recall": 0.9856603683822421, |
|
"eval_runtime": 22.0421, |
|
"eval_samples_per_second": 664.592, |
|
"eval_steps_per_second": 2.631, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.9800000000000004e-05, |
|
"loss": 0.7525, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.9877989843354454, |
|
"eval_f1": 0.9870567994418368, |
|
"eval_loss": 0.038840390741825104, |
|
"eval_precision": 0.986986371960646, |
|
"eval_recall": 0.9871272369746034, |
|
"eval_runtime": 19.2442, |
|
"eval_samples_per_second": 761.216, |
|
"eval_steps_per_second": 3.014, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9880904892680158, |
|
"eval_f1": 0.987278301765262, |
|
"eval_loss": 0.03728002309799194, |
|
"eval_precision": 0.9870924532264904, |
|
"eval_recall": 0.9874642202998755, |
|
"eval_runtime": 19.7895, |
|
"eval_samples_per_second": 740.24, |
|
"eval_steps_per_second": 2.931, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.9888691142852759, |
|
"eval_f1": 0.9881274230760082, |
|
"eval_loss": 0.03543518856167793, |
|
"eval_precision": 0.9880334223857051, |
|
"eval_recall": 0.9882214416543106, |
|
"eval_runtime": 20.2566, |
|
"eval_samples_per_second": 723.172, |
|
"eval_steps_per_second": 2.863, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.9891414412617561, |
|
"eval_f1": 0.9883990811231861, |
|
"eval_loss": 0.03501536697149277, |
|
"eval_precision": 0.988295262217043, |
|
"eval_recall": 0.9885029218436556, |
|
"eval_runtime": 20.0491, |
|
"eval_samples_per_second": 730.658, |
|
"eval_steps_per_second": 2.893, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 4.462121212121213e-05, |
|
"loss": 0.0318, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_accuracy": 0.9890839073934856, |
|
"eval_f1": 0.9884860173195352, |
|
"eval_loss": 0.03539884835481644, |
|
"eval_precision": 0.9884056936962649, |
|
"eval_recall": 0.988566353999001, |
|
"eval_runtime": 19.1691, |
|
"eval_samples_per_second": 764.201, |
|
"eval_steps_per_second": 3.026, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.989383083508492, |
|
"eval_f1": 0.9887981223821485, |
|
"eval_loss": 0.03559485822916031, |
|
"eval_precision": 0.9888118431120679, |
|
"eval_recall": 0.9887844020330006, |
|
"eval_runtime": 19.05, |
|
"eval_samples_per_second": 768.977, |
|
"eval_steps_per_second": 3.045, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.9894904800625969, |
|
"eval_f1": 0.9888288531140862, |
|
"eval_loss": 0.036680448800325394, |
|
"eval_precision": 0.9887543801233569, |
|
"eval_recall": 0.9889033373242732, |
|
"eval_runtime": 20.8166, |
|
"eval_samples_per_second": 703.718, |
|
"eval_steps_per_second": 2.786, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.9893677411436199, |
|
"eval_f1": 0.9887373190775782, |
|
"eval_loss": 0.03701608628034592, |
|
"eval_precision": 0.9886942043922937, |
|
"eval_recall": 0.9887804375232915, |
|
"eval_runtime": 19.5262, |
|
"eval_samples_per_second": 750.222, |
|
"eval_steps_per_second": 2.97, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 3.9209956709956716e-05, |
|
"loss": 0.0205, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_accuracy": 0.9896170545727918, |
|
"eval_f1": 0.9890034963648904, |
|
"eval_loss": 0.03703853860497475, |
|
"eval_precision": 0.9889094123336214, |
|
"eval_recall": 0.9890975983000182, |
|
"eval_runtime": 18.6632, |
|
"eval_samples_per_second": 784.912, |
|
"eval_steps_per_second": 3.108, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.9894828088801608, |
|
"eval_f1": 0.9888483647175421, |
|
"eval_loss": 0.03879130259156227, |
|
"eval_precision": 0.9888013256059176, |
|
"eval_recall": 0.9888954083048549, |
|
"eval_runtime": 18.564, |
|
"eval_samples_per_second": 789.109, |
|
"eval_steps_per_second": 3.124, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.9896592460761902, |
|
"eval_f1": 0.989038521766364, |
|
"eval_loss": 0.03970788046717644, |
|
"eval_precision": 0.9890032348090828, |
|
"eval_recall": 0.9890738112417637, |
|
"eval_runtime": 19.0522, |
|
"eval_samples_per_second": 768.889, |
|
"eval_steps_per_second": 3.044, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.9896784240322803, |
|
"eval_f1": 0.989059827026727, |
|
"eval_loss": 0.040303945541381836, |
|
"eval_precision": 0.989069630064266, |
|
"eval_recall": 0.9890500241835092, |
|
"eval_runtime": 18.8982, |
|
"eval_samples_per_second": 775.155, |
|
"eval_steps_per_second": 3.069, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 3.3798701298701305e-05, |
|
"loss": 0.0146, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_accuracy": 0.9897052731708066, |
|
"eval_f1": 0.9891171188315472, |
|
"eval_loss": 0.04127529263496399, |
|
"eval_precision": 0.9891445699061152, |
|
"eval_recall": 0.9890896692806, |
|
"eval_runtime": 18.6421, |
|
"eval_samples_per_second": 785.8, |
|
"eval_steps_per_second": 3.111, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.989758971447859, |
|
"eval_f1": 0.9890990835617984, |
|
"eval_loss": 0.04227915033698082, |
|
"eval_precision": 0.9891441825426719, |
|
"eval_recall": 0.9890539886932183, |
|
"eval_runtime": 18.5222, |
|
"eval_samples_per_second": 790.89, |
|
"eval_steps_per_second": 3.131, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.9897436290829869, |
|
"eval_f1": 0.9891194678073816, |
|
"eval_loss": 0.04291819408535957, |
|
"eval_precision": 0.9891135858167388, |
|
"eval_recall": 0.9891253498679818, |
|
"eval_runtime": 19.3021, |
|
"eval_samples_per_second": 758.935, |
|
"eval_steps_per_second": 3.005, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.9899277374614524, |
|
"eval_f1": 0.9893141089045129, |
|
"eval_loss": 0.04433906078338623, |
|
"eval_precision": 0.9892729298062706, |
|
"eval_recall": 0.9893552914311087, |
|
"eval_runtime": 20.0023, |
|
"eval_samples_per_second": 732.367, |
|
"eval_steps_per_second": 2.9, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 19.53, |
|
"learning_rate": 2.838744588744589e-05, |
|
"loss": 0.0103, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_accuracy": 0.9895978766167017, |
|
"eval_f1": 0.9889502105288197, |
|
"eval_loss": 0.04566018655896187, |
|
"eval_precision": 0.9890129497315686, |
|
"eval_recall": 0.9888874792854367, |
|
"eval_runtime": 18.9384, |
|
"eval_samples_per_second": 773.508, |
|
"eval_steps_per_second": 3.063, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.9898049985424754, |
|
"eval_f1": 0.9891496101074732, |
|
"eval_loss": 0.04549423232674599, |
|
"eval_precision": 0.9891064774439071, |
|
"eval_recall": 0.9891927465330362, |
|
"eval_runtime": 18.8835, |
|
"eval_samples_per_second": 775.757, |
|
"eval_steps_per_second": 3.071, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"eval_accuracy": 0.9897704782215131, |
|
"eval_f1": 0.9891315336173181, |
|
"eval_loss": 0.04684610292315483, |
|
"eval_precision": 0.989109966739214, |
|
"eval_recall": 0.9891531014359454, |
|
"eval_runtime": 18.9876, |
|
"eval_samples_per_second": 771.504, |
|
"eval_steps_per_second": 3.055, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.9898471900458736, |
|
"eval_f1": 0.9891676858093711, |
|
"eval_loss": 0.049145638942718506, |
|
"eval_precision": 0.9891029884528939, |
|
"eval_recall": 0.9892323916301271, |
|
"eval_runtime": 18.5499, |
|
"eval_samples_per_second": 789.708, |
|
"eval_steps_per_second": 3.127, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 23.43, |
|
"learning_rate": 2.2976190476190476e-05, |
|
"loss": 0.0073, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"eval_accuracy": 0.9899622577824145, |
|
"eval_f1": 0.9894013665041952, |
|
"eval_loss": 0.04954079911112785, |
|
"eval_precision": 0.9893562619667725, |
|
"eval_recall": 0.9894464751544176, |
|
"eval_runtime": 19.048, |
|
"eval_samples_per_second": 769.056, |
|
"eval_steps_per_second": 3.045, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.9897858205863852, |
|
"eval_f1": 0.9891747989478087, |
|
"eval_loss": 0.05031678453087807, |
|
"eval_precision": 0.9891846031248885, |
|
"eval_recall": 0.9891649949650727, |
|
"eval_runtime": 18.6731, |
|
"eval_samples_per_second": 784.498, |
|
"eval_steps_per_second": 3.106, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"eval_accuracy": 0.9898126697249114, |
|
"eval_f1": 0.9892364414843007, |
|
"eval_loss": 0.05185426026582718, |
|
"eval_precision": 0.9892285979337303, |
|
"eval_recall": 0.9892442851592543, |
|
"eval_runtime": 19.572, |
|
"eval_samples_per_second": 748.468, |
|
"eval_steps_per_second": 2.963, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.9898548612283097, |
|
"eval_f1": 0.9892308576661506, |
|
"eval_loss": 0.05215698853135109, |
|
"eval_precision": 0.9891896820331485, |
|
"eval_recall": 0.989272036727218, |
|
"eval_runtime": 19.399, |
|
"eval_samples_per_second": 755.14, |
|
"eval_steps_per_second": 2.99, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 27.34, |
|
"learning_rate": 1.7564935064935065e-05, |
|
"loss": 0.0052, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"eval_accuracy": 0.9898702035931819, |
|
"eval_f1": 0.9892028504316283, |
|
"eval_loss": 0.05260332301259041, |
|
"eval_precision": 0.9891852033919135, |
|
"eval_recall": 0.9892204981009999, |
|
"eval_runtime": 19.6509, |
|
"eval_samples_per_second": 745.461, |
|
"eval_steps_per_second": 2.952, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.9898663680019638, |
|
"eval_f1": 0.9891989716326818, |
|
"eval_loss": 0.05352339521050453, |
|
"eval_precision": 0.9891734819683569, |
|
"eval_recall": 0.989224462610709, |
|
"eval_runtime": 19.4431, |
|
"eval_samples_per_second": 753.429, |
|
"eval_steps_per_second": 2.983, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_accuracy": 0.9900121204682489, |
|
"eval_f1": 0.9893592137553174, |
|
"eval_loss": 0.054358094930648804, |
|
"eval_precision": 0.9893631361106265, |
|
"eval_recall": 0.9893552914311087, |
|
"eval_runtime": 18.4688, |
|
"eval_samples_per_second": 793.177, |
|
"eval_steps_per_second": 3.14, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_accuracy": 0.9899660933736326, |
|
"eval_f1": 0.9893714865647028, |
|
"eval_loss": 0.05478381738066673, |
|
"eval_precision": 0.989340109572098, |
|
"eval_recall": 0.9894028655476177, |
|
"eval_runtime": 18.761, |
|
"eval_samples_per_second": 780.821, |
|
"eval_steps_per_second": 3.092, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 1.2153679653679655e-05, |
|
"loss": 0.0038, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"eval_accuracy": 0.9898702035931819, |
|
"eval_f1": 0.989232477006026, |
|
"eval_loss": 0.05625994876027107, |
|
"eval_precision": 0.9892246334868896, |
|
"eval_recall": 0.9892403206495453, |
|
"eval_runtime": 19.3617, |
|
"eval_samples_per_second": 756.597, |
|
"eval_steps_per_second": 2.996, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.9900466407892112, |
|
"eval_f1": 0.9894185977362381, |
|
"eval_loss": 0.05615779384970665, |
|
"eval_precision": 0.9894303656950744, |
|
"eval_recall": 0.9894068300573268, |
|
"eval_runtime": 18.6932, |
|
"eval_samples_per_second": 783.655, |
|
"eval_steps_per_second": 3.103, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"eval_accuracy": 0.9898318476810015, |
|
"eval_f1": 0.989171306638546, |
|
"eval_loss": 0.057734012603759766, |
|
"eval_precision": 0.9891379754613387, |
|
"eval_recall": 0.9892046400621635, |
|
"eval_runtime": 18.8622, |
|
"eval_samples_per_second": 776.632, |
|
"eval_steps_per_second": 3.075, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"eval_accuracy": 0.9900658187453014, |
|
"eval_f1": 0.9894462659525121, |
|
"eval_loss": 0.05798300728201866, |
|
"eval_precision": 0.989465880076756, |
|
"eval_recall": 0.9894266526058723, |
|
"eval_runtime": 19.4979, |
|
"eval_samples_per_second": 751.311, |
|
"eval_steps_per_second": 2.975, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 35.16, |
|
"learning_rate": 6.742424242424243e-06, |
|
"loss": 0.003, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"eval_accuracy": 0.9899776001472868, |
|
"eval_f1": 0.9893636842960725, |
|
"eval_loss": 0.05809687077999115, |
|
"eval_precision": 0.9893205422976294, |
|
"eval_recall": 0.9894068300573268, |
|
"eval_runtime": 19.0851, |
|
"eval_samples_per_second": 767.563, |
|
"eval_steps_per_second": 3.039, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_accuracy": 0.989889381549272, |
|
"eval_f1": 0.9892901530063094, |
|
"eval_loss": 0.058496102690696716, |
|
"eval_precision": 0.9892646609924242, |
|
"eval_recall": 0.9893156463340179, |
|
"eval_runtime": 19.0831, |
|
"eval_samples_per_second": 767.642, |
|
"eval_steps_per_second": 3.039, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 37.99, |
|
"eval_accuracy": 0.9899699289648506, |
|
"eval_f1": 0.9893475735699306, |
|
"eval_loss": 0.05856472626328468, |
|
"eval_precision": 0.9893279629570898, |
|
"eval_recall": 0.989367184960236, |
|
"eval_runtime": 19.3383, |
|
"eval_samples_per_second": 757.512, |
|
"eval_steps_per_second": 2.999, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_accuracy": 0.9899737645560687, |
|
"eval_f1": 0.9893515802159814, |
|
"eval_loss": 0.05881791561841965, |
|
"eval_precision": 0.9893280475718533, |
|
"eval_recall": 0.9893751139796542, |
|
"eval_runtime": 19.2023, |
|
"eval_samples_per_second": 762.879, |
|
"eval_steps_per_second": 3.02, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 39.06, |
|
"learning_rate": 1.3311688311688312e-06, |
|
"loss": 0.0024, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"eval_accuracy": 0.9899891069209408, |
|
"eval_f1": 0.9893752824668374, |
|
"eval_loss": 0.058905407786369324, |
|
"eval_precision": 0.9893595934127796, |
|
"eval_recall": 0.9893909720184905, |
|
"eval_runtime": 18.6772, |
|
"eval_samples_per_second": 784.326, |
|
"eval_steps_per_second": 3.105, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"step": 5120, |
|
"total_flos": 3.2246769193641984e+17, |
|
"train_loss": 0.08320926361484453, |
|
"train_runtime": 4249.1875, |
|
"train_samples_per_second": 1241.018, |
|
"train_steps_per_second": 1.205 |
|
} |
|
], |
|
"max_steps": 5120, |
|
"num_train_epochs": 40, |
|
"total_flos": 3.2246769193641984e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|