{ "best_metric": 0.9894462659525121, "best_model_checkpoint": "models/pos_final_xlm_de/checkpoint-4480", "epoch": 39.994174757281556, "global_step": 5120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "eval_accuracy": 0.9195868301139938, "eval_f1": 0.9132772848631359, "eval_loss": 0.3827908933162689, "eval_precision": 0.9159339498123846, "eval_recall": 0.9106359866475313, "eval_runtime": 21.0552, "eval_samples_per_second": 695.744, "eval_steps_per_second": 2.755, "step": 128 }, { "epoch": 1.99, "eval_accuracy": 0.982432992221421, "eval_f1": 0.9811226512328548, "eval_loss": 0.06585212051868439, "eval_precision": 0.9810370934113413, "eval_recall": 0.9812082239789405, "eval_runtime": 20.8303, "eval_samples_per_second": 703.256, "eval_steps_per_second": 2.784, "step": 256 }, { "epoch": 2.99, "eval_accuracy": 0.9865217324598413, "eval_f1": 0.9856564607376338, "eval_loss": 0.04468955472111702, "eval_precision": 0.9856525531240089, "eval_recall": 0.9856603683822421, "eval_runtime": 22.0421, "eval_samples_per_second": 664.592, "eval_steps_per_second": 2.631, "step": 384 }, { "epoch": 3.9, "learning_rate": 4.9800000000000004e-05, "loss": 0.7525, "step": 500 }, { "epoch": 3.99, "eval_accuracy": 0.9877989843354454, "eval_f1": 0.9870567994418368, "eval_loss": 0.038840390741825104, "eval_precision": 0.986986371960646, "eval_recall": 0.9871272369746034, "eval_runtime": 19.2442, "eval_samples_per_second": 761.216, "eval_steps_per_second": 3.014, "step": 512 }, { "epoch": 4.99, "eval_accuracy": 0.9880904892680158, "eval_f1": 0.987278301765262, "eval_loss": 0.03728002309799194, "eval_precision": 0.9870924532264904, "eval_recall": 0.9874642202998755, "eval_runtime": 19.7895, "eval_samples_per_second": 740.24, "eval_steps_per_second": 2.931, "step": 640 }, { "epoch": 5.99, "eval_accuracy": 0.9888691142852759, "eval_f1": 0.9881274230760082, "eval_loss": 0.03543518856167793, "eval_precision": 0.9880334223857051, "eval_recall": 0.9882214416543106, "eval_runtime": 20.2566, "eval_samples_per_second": 723.172, "eval_steps_per_second": 2.863, "step": 768 }, { "epoch": 6.99, "eval_accuracy": 0.9891414412617561, "eval_f1": 0.9883990811231861, "eval_loss": 0.03501536697149277, "eval_precision": 0.988295262217043, "eval_recall": 0.9885029218436556, "eval_runtime": 20.0491, "eval_samples_per_second": 730.658, "eval_steps_per_second": 2.893, "step": 896 }, { "epoch": 7.81, "learning_rate": 4.462121212121213e-05, "loss": 0.0318, "step": 1000 }, { "epoch": 7.99, "eval_accuracy": 0.9890839073934856, "eval_f1": 0.9884860173195352, "eval_loss": 0.03539884835481644, "eval_precision": 0.9884056936962649, "eval_recall": 0.988566353999001, "eval_runtime": 19.1691, "eval_samples_per_second": 764.201, "eval_steps_per_second": 3.026, "step": 1024 }, { "epoch": 8.99, "eval_accuracy": 0.989383083508492, "eval_f1": 0.9887981223821485, "eval_loss": 0.03559485822916031, "eval_precision": 0.9888118431120679, "eval_recall": 0.9887844020330006, "eval_runtime": 19.05, "eval_samples_per_second": 768.977, "eval_steps_per_second": 3.045, "step": 1152 }, { "epoch": 9.99, "eval_accuracy": 0.9894904800625969, "eval_f1": 0.9888288531140862, "eval_loss": 0.036680448800325394, "eval_precision": 0.9887543801233569, "eval_recall": 0.9889033373242732, "eval_runtime": 20.8166, "eval_samples_per_second": 703.718, "eval_steps_per_second": 2.786, "step": 1280 }, { "epoch": 10.99, "eval_accuracy": 0.9893677411436199, "eval_f1": 0.9887373190775782, "eval_loss": 0.03701608628034592, "eval_precision": 0.9886942043922937, "eval_recall": 0.9887804375232915, "eval_runtime": 19.5262, "eval_samples_per_second": 750.222, "eval_steps_per_second": 2.97, "step": 1408 }, { "epoch": 11.71, "learning_rate": 3.9209956709956716e-05, "loss": 0.0205, "step": 1500 }, { "epoch": 11.99, "eval_accuracy": 0.9896170545727918, "eval_f1": 0.9890034963648904, "eval_loss": 0.03703853860497475, "eval_precision": 0.9889094123336214, "eval_recall": 0.9890975983000182, "eval_runtime": 18.6632, "eval_samples_per_second": 784.912, "eval_steps_per_second": 3.108, "step": 1536 }, { "epoch": 12.99, "eval_accuracy": 0.9894828088801608, "eval_f1": 0.9888483647175421, "eval_loss": 0.03879130259156227, "eval_precision": 0.9888013256059176, "eval_recall": 0.9888954083048549, "eval_runtime": 18.564, "eval_samples_per_second": 789.109, "eval_steps_per_second": 3.124, "step": 1664 }, { "epoch": 13.99, "eval_accuracy": 0.9896592460761902, "eval_f1": 0.989038521766364, "eval_loss": 0.03970788046717644, "eval_precision": 0.9890032348090828, "eval_recall": 0.9890738112417637, "eval_runtime": 19.0522, "eval_samples_per_second": 768.889, "eval_steps_per_second": 3.044, "step": 1792 }, { "epoch": 14.99, "eval_accuracy": 0.9896784240322803, "eval_f1": 0.989059827026727, "eval_loss": 0.040303945541381836, "eval_precision": 0.989069630064266, "eval_recall": 0.9890500241835092, "eval_runtime": 18.8982, "eval_samples_per_second": 775.155, "eval_steps_per_second": 3.069, "step": 1920 }, { "epoch": 15.62, "learning_rate": 3.3798701298701305e-05, "loss": 0.0146, "step": 2000 }, { "epoch": 15.99, "eval_accuracy": 0.9897052731708066, "eval_f1": 0.9891171188315472, "eval_loss": 0.04127529263496399, "eval_precision": 0.9891445699061152, "eval_recall": 0.9890896692806, "eval_runtime": 18.6421, "eval_samples_per_second": 785.8, "eval_steps_per_second": 3.111, "step": 2048 }, { "epoch": 16.99, "eval_accuracy": 0.989758971447859, "eval_f1": 0.9890990835617984, "eval_loss": 0.04227915033698082, "eval_precision": 0.9891441825426719, "eval_recall": 0.9890539886932183, "eval_runtime": 18.5222, "eval_samples_per_second": 790.89, "eval_steps_per_second": 3.131, "step": 2176 }, { "epoch": 17.99, "eval_accuracy": 0.9897436290829869, "eval_f1": 0.9891194678073816, "eval_loss": 0.04291819408535957, "eval_precision": 0.9891135858167388, "eval_recall": 0.9891253498679818, "eval_runtime": 19.3021, "eval_samples_per_second": 758.935, "eval_steps_per_second": 3.005, "step": 2304 }, { "epoch": 18.99, "eval_accuracy": 0.9899277374614524, "eval_f1": 0.9893141089045129, "eval_loss": 0.04433906078338623, "eval_precision": 0.9892729298062706, "eval_recall": 0.9893552914311087, "eval_runtime": 20.0023, "eval_samples_per_second": 732.367, "eval_steps_per_second": 2.9, "step": 2432 }, { "epoch": 19.53, "learning_rate": 2.838744588744589e-05, "loss": 0.0103, "step": 2500 }, { "epoch": 19.99, "eval_accuracy": 0.9895978766167017, "eval_f1": 0.9889502105288197, "eval_loss": 0.04566018655896187, "eval_precision": 0.9890129497315686, "eval_recall": 0.9888874792854367, "eval_runtime": 18.9384, "eval_samples_per_second": 773.508, "eval_steps_per_second": 3.063, "step": 2560 }, { "epoch": 20.99, "eval_accuracy": 0.9898049985424754, "eval_f1": 0.9891496101074732, "eval_loss": 0.04549423232674599, "eval_precision": 0.9891064774439071, "eval_recall": 0.9891927465330362, "eval_runtime": 18.8835, "eval_samples_per_second": 775.757, "eval_steps_per_second": 3.071, "step": 2688 }, { "epoch": 21.99, "eval_accuracy": 0.9897704782215131, "eval_f1": 0.9891315336173181, "eval_loss": 0.04684610292315483, "eval_precision": 0.989109966739214, "eval_recall": 0.9891531014359454, "eval_runtime": 18.9876, "eval_samples_per_second": 771.504, "eval_steps_per_second": 3.055, "step": 2816 }, { "epoch": 22.99, "eval_accuracy": 0.9898471900458736, "eval_f1": 0.9891676858093711, "eval_loss": 0.049145638942718506, "eval_precision": 0.9891029884528939, "eval_recall": 0.9892323916301271, "eval_runtime": 18.5499, "eval_samples_per_second": 789.708, "eval_steps_per_second": 3.127, "step": 2944 }, { "epoch": 23.43, "learning_rate": 2.2976190476190476e-05, "loss": 0.0073, "step": 3000 }, { "epoch": 23.99, "eval_accuracy": 0.9899622577824145, "eval_f1": 0.9894013665041952, "eval_loss": 0.04954079911112785, "eval_precision": 0.9893562619667725, "eval_recall": 0.9894464751544176, "eval_runtime": 19.048, "eval_samples_per_second": 769.056, "eval_steps_per_second": 3.045, "step": 3072 }, { "epoch": 24.99, "eval_accuracy": 0.9897858205863852, "eval_f1": 0.9891747989478087, "eval_loss": 0.05031678453087807, "eval_precision": 0.9891846031248885, "eval_recall": 0.9891649949650727, "eval_runtime": 18.6731, "eval_samples_per_second": 784.498, "eval_steps_per_second": 3.106, "step": 3200 }, { "epoch": 25.99, "eval_accuracy": 0.9898126697249114, "eval_f1": 0.9892364414843007, "eval_loss": 0.05185426026582718, "eval_precision": 0.9892285979337303, "eval_recall": 0.9892442851592543, "eval_runtime": 19.572, "eval_samples_per_second": 748.468, "eval_steps_per_second": 2.963, "step": 3328 }, { "epoch": 26.99, "eval_accuracy": 0.9898548612283097, "eval_f1": 0.9892308576661506, "eval_loss": 0.05215698853135109, "eval_precision": 0.9891896820331485, "eval_recall": 0.989272036727218, "eval_runtime": 19.399, "eval_samples_per_second": 755.14, "eval_steps_per_second": 2.99, "step": 3456 }, { "epoch": 27.34, "learning_rate": 1.7564935064935065e-05, "loss": 0.0052, "step": 3500 }, { "epoch": 27.99, "eval_accuracy": 0.9898702035931819, "eval_f1": 0.9892028504316283, "eval_loss": 0.05260332301259041, "eval_precision": 0.9891852033919135, "eval_recall": 0.9892204981009999, "eval_runtime": 19.6509, "eval_samples_per_second": 745.461, "eval_steps_per_second": 2.952, "step": 3584 }, { "epoch": 28.99, "eval_accuracy": 0.9898663680019638, "eval_f1": 0.9891989716326818, "eval_loss": 0.05352339521050453, "eval_precision": 0.9891734819683569, "eval_recall": 0.989224462610709, "eval_runtime": 19.4431, "eval_samples_per_second": 753.429, "eval_steps_per_second": 2.983, "step": 3712 }, { "epoch": 29.99, "eval_accuracy": 0.9900121204682489, "eval_f1": 0.9893592137553174, "eval_loss": 0.054358094930648804, "eval_precision": 0.9893631361106265, "eval_recall": 0.9893552914311087, "eval_runtime": 18.4688, "eval_samples_per_second": 793.177, "eval_steps_per_second": 3.14, "step": 3840 }, { "epoch": 30.99, "eval_accuracy": 0.9899660933736326, "eval_f1": 0.9893714865647028, "eval_loss": 0.05478381738066673, "eval_precision": 0.989340109572098, "eval_recall": 0.9894028655476177, "eval_runtime": 18.761, "eval_samples_per_second": 780.821, "eval_steps_per_second": 3.092, "step": 3968 }, { "epoch": 31.25, "learning_rate": 1.2153679653679655e-05, "loss": 0.0038, "step": 4000 }, { "epoch": 31.99, "eval_accuracy": 0.9898702035931819, "eval_f1": 0.989232477006026, "eval_loss": 0.05625994876027107, "eval_precision": 0.9892246334868896, "eval_recall": 0.9892403206495453, "eval_runtime": 19.3617, "eval_samples_per_second": 756.597, "eval_steps_per_second": 2.996, "step": 4096 }, { "epoch": 32.99, "eval_accuracy": 0.9900466407892112, "eval_f1": 0.9894185977362381, "eval_loss": 0.05615779384970665, "eval_precision": 0.9894303656950744, "eval_recall": 0.9894068300573268, "eval_runtime": 18.6932, "eval_samples_per_second": 783.655, "eval_steps_per_second": 3.103, "step": 4224 }, { "epoch": 33.99, "eval_accuracy": 0.9898318476810015, "eval_f1": 0.989171306638546, "eval_loss": 0.057734012603759766, "eval_precision": 0.9891379754613387, "eval_recall": 0.9892046400621635, "eval_runtime": 18.8622, "eval_samples_per_second": 776.632, "eval_steps_per_second": 3.075, "step": 4352 }, { "epoch": 34.99, "eval_accuracy": 0.9900658187453014, "eval_f1": 0.9894462659525121, "eval_loss": 0.05798300728201866, "eval_precision": 0.989465880076756, "eval_recall": 0.9894266526058723, "eval_runtime": 19.4979, "eval_samples_per_second": 751.311, "eval_steps_per_second": 2.975, "step": 4480 }, { "epoch": 35.16, "learning_rate": 6.742424242424243e-06, "loss": 0.003, "step": 4500 }, { "epoch": 35.99, "eval_accuracy": 0.9899776001472868, "eval_f1": 0.9893636842960725, "eval_loss": 0.05809687077999115, "eval_precision": 0.9893205422976294, "eval_recall": 0.9894068300573268, "eval_runtime": 19.0851, "eval_samples_per_second": 767.563, "eval_steps_per_second": 3.039, "step": 4608 }, { "epoch": 36.99, "eval_accuracy": 0.989889381549272, "eval_f1": 0.9892901530063094, "eval_loss": 0.058496102690696716, "eval_precision": 0.9892646609924242, "eval_recall": 0.9893156463340179, "eval_runtime": 19.0831, "eval_samples_per_second": 767.642, "eval_steps_per_second": 3.039, "step": 4736 }, { "epoch": 37.99, "eval_accuracy": 0.9899699289648506, "eval_f1": 0.9893475735699306, "eval_loss": 0.05856472626328468, "eval_precision": 0.9893279629570898, "eval_recall": 0.989367184960236, "eval_runtime": 19.3383, "eval_samples_per_second": 757.512, "eval_steps_per_second": 2.999, "step": 4864 }, { "epoch": 38.99, "eval_accuracy": 0.9899737645560687, "eval_f1": 0.9893515802159814, "eval_loss": 0.05881791561841965, "eval_precision": 0.9893280475718533, "eval_recall": 0.9893751139796542, "eval_runtime": 19.2023, "eval_samples_per_second": 762.879, "eval_steps_per_second": 3.02, "step": 4992 }, { "epoch": 39.06, "learning_rate": 1.3311688311688312e-06, "loss": 0.0024, "step": 5000 }, { "epoch": 39.99, "eval_accuracy": 0.9899891069209408, "eval_f1": 0.9893752824668374, "eval_loss": 0.058905407786369324, "eval_precision": 0.9893595934127796, "eval_recall": 0.9893909720184905, "eval_runtime": 18.6772, "eval_samples_per_second": 784.326, "eval_steps_per_second": 3.105, "step": 5120 }, { "epoch": 39.99, "step": 5120, "total_flos": 3.2246769193641984e+17, "train_loss": 0.08320926361484453, "train_runtime": 4249.1875, "train_samples_per_second": 1241.018, "train_steps_per_second": 1.205 } ], "max_steps": 5120, "num_train_epochs": 40, "total_flos": 3.2246769193641984e+17, "trial_name": null, "trial_params": null }