lettuce_pos_de_xlm / trainer_state.json
pranaydeeps's picture
Upload folder using huggingface_hub
1f78c04 verified
{
"best_metric": 0.9894462659525121,
"best_model_checkpoint": "models/pos_final_xlm_de/checkpoint-4480",
"epoch": 39.994174757281556,
"global_step": 5120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.99,
"eval_accuracy": 0.9195868301139938,
"eval_f1": 0.9132772848631359,
"eval_loss": 0.3827908933162689,
"eval_precision": 0.9159339498123846,
"eval_recall": 0.9106359866475313,
"eval_runtime": 21.0552,
"eval_samples_per_second": 695.744,
"eval_steps_per_second": 2.755,
"step": 128
},
{
"epoch": 1.99,
"eval_accuracy": 0.982432992221421,
"eval_f1": 0.9811226512328548,
"eval_loss": 0.06585212051868439,
"eval_precision": 0.9810370934113413,
"eval_recall": 0.9812082239789405,
"eval_runtime": 20.8303,
"eval_samples_per_second": 703.256,
"eval_steps_per_second": 2.784,
"step": 256
},
{
"epoch": 2.99,
"eval_accuracy": 0.9865217324598413,
"eval_f1": 0.9856564607376338,
"eval_loss": 0.04468955472111702,
"eval_precision": 0.9856525531240089,
"eval_recall": 0.9856603683822421,
"eval_runtime": 22.0421,
"eval_samples_per_second": 664.592,
"eval_steps_per_second": 2.631,
"step": 384
},
{
"epoch": 3.9,
"learning_rate": 4.9800000000000004e-05,
"loss": 0.7525,
"step": 500
},
{
"epoch": 3.99,
"eval_accuracy": 0.9877989843354454,
"eval_f1": 0.9870567994418368,
"eval_loss": 0.038840390741825104,
"eval_precision": 0.986986371960646,
"eval_recall": 0.9871272369746034,
"eval_runtime": 19.2442,
"eval_samples_per_second": 761.216,
"eval_steps_per_second": 3.014,
"step": 512
},
{
"epoch": 4.99,
"eval_accuracy": 0.9880904892680158,
"eval_f1": 0.987278301765262,
"eval_loss": 0.03728002309799194,
"eval_precision": 0.9870924532264904,
"eval_recall": 0.9874642202998755,
"eval_runtime": 19.7895,
"eval_samples_per_second": 740.24,
"eval_steps_per_second": 2.931,
"step": 640
},
{
"epoch": 5.99,
"eval_accuracy": 0.9888691142852759,
"eval_f1": 0.9881274230760082,
"eval_loss": 0.03543518856167793,
"eval_precision": 0.9880334223857051,
"eval_recall": 0.9882214416543106,
"eval_runtime": 20.2566,
"eval_samples_per_second": 723.172,
"eval_steps_per_second": 2.863,
"step": 768
},
{
"epoch": 6.99,
"eval_accuracy": 0.9891414412617561,
"eval_f1": 0.9883990811231861,
"eval_loss": 0.03501536697149277,
"eval_precision": 0.988295262217043,
"eval_recall": 0.9885029218436556,
"eval_runtime": 20.0491,
"eval_samples_per_second": 730.658,
"eval_steps_per_second": 2.893,
"step": 896
},
{
"epoch": 7.81,
"learning_rate": 4.462121212121213e-05,
"loss": 0.0318,
"step": 1000
},
{
"epoch": 7.99,
"eval_accuracy": 0.9890839073934856,
"eval_f1": 0.9884860173195352,
"eval_loss": 0.03539884835481644,
"eval_precision": 0.9884056936962649,
"eval_recall": 0.988566353999001,
"eval_runtime": 19.1691,
"eval_samples_per_second": 764.201,
"eval_steps_per_second": 3.026,
"step": 1024
},
{
"epoch": 8.99,
"eval_accuracy": 0.989383083508492,
"eval_f1": 0.9887981223821485,
"eval_loss": 0.03559485822916031,
"eval_precision": 0.9888118431120679,
"eval_recall": 0.9887844020330006,
"eval_runtime": 19.05,
"eval_samples_per_second": 768.977,
"eval_steps_per_second": 3.045,
"step": 1152
},
{
"epoch": 9.99,
"eval_accuracy": 0.9894904800625969,
"eval_f1": 0.9888288531140862,
"eval_loss": 0.036680448800325394,
"eval_precision": 0.9887543801233569,
"eval_recall": 0.9889033373242732,
"eval_runtime": 20.8166,
"eval_samples_per_second": 703.718,
"eval_steps_per_second": 2.786,
"step": 1280
},
{
"epoch": 10.99,
"eval_accuracy": 0.9893677411436199,
"eval_f1": 0.9887373190775782,
"eval_loss": 0.03701608628034592,
"eval_precision": 0.9886942043922937,
"eval_recall": 0.9887804375232915,
"eval_runtime": 19.5262,
"eval_samples_per_second": 750.222,
"eval_steps_per_second": 2.97,
"step": 1408
},
{
"epoch": 11.71,
"learning_rate": 3.9209956709956716e-05,
"loss": 0.0205,
"step": 1500
},
{
"epoch": 11.99,
"eval_accuracy": 0.9896170545727918,
"eval_f1": 0.9890034963648904,
"eval_loss": 0.03703853860497475,
"eval_precision": 0.9889094123336214,
"eval_recall": 0.9890975983000182,
"eval_runtime": 18.6632,
"eval_samples_per_second": 784.912,
"eval_steps_per_second": 3.108,
"step": 1536
},
{
"epoch": 12.99,
"eval_accuracy": 0.9894828088801608,
"eval_f1": 0.9888483647175421,
"eval_loss": 0.03879130259156227,
"eval_precision": 0.9888013256059176,
"eval_recall": 0.9888954083048549,
"eval_runtime": 18.564,
"eval_samples_per_second": 789.109,
"eval_steps_per_second": 3.124,
"step": 1664
},
{
"epoch": 13.99,
"eval_accuracy": 0.9896592460761902,
"eval_f1": 0.989038521766364,
"eval_loss": 0.03970788046717644,
"eval_precision": 0.9890032348090828,
"eval_recall": 0.9890738112417637,
"eval_runtime": 19.0522,
"eval_samples_per_second": 768.889,
"eval_steps_per_second": 3.044,
"step": 1792
},
{
"epoch": 14.99,
"eval_accuracy": 0.9896784240322803,
"eval_f1": 0.989059827026727,
"eval_loss": 0.040303945541381836,
"eval_precision": 0.989069630064266,
"eval_recall": 0.9890500241835092,
"eval_runtime": 18.8982,
"eval_samples_per_second": 775.155,
"eval_steps_per_second": 3.069,
"step": 1920
},
{
"epoch": 15.62,
"learning_rate": 3.3798701298701305e-05,
"loss": 0.0146,
"step": 2000
},
{
"epoch": 15.99,
"eval_accuracy": 0.9897052731708066,
"eval_f1": 0.9891171188315472,
"eval_loss": 0.04127529263496399,
"eval_precision": 0.9891445699061152,
"eval_recall": 0.9890896692806,
"eval_runtime": 18.6421,
"eval_samples_per_second": 785.8,
"eval_steps_per_second": 3.111,
"step": 2048
},
{
"epoch": 16.99,
"eval_accuracy": 0.989758971447859,
"eval_f1": 0.9890990835617984,
"eval_loss": 0.04227915033698082,
"eval_precision": 0.9891441825426719,
"eval_recall": 0.9890539886932183,
"eval_runtime": 18.5222,
"eval_samples_per_second": 790.89,
"eval_steps_per_second": 3.131,
"step": 2176
},
{
"epoch": 17.99,
"eval_accuracy": 0.9897436290829869,
"eval_f1": 0.9891194678073816,
"eval_loss": 0.04291819408535957,
"eval_precision": 0.9891135858167388,
"eval_recall": 0.9891253498679818,
"eval_runtime": 19.3021,
"eval_samples_per_second": 758.935,
"eval_steps_per_second": 3.005,
"step": 2304
},
{
"epoch": 18.99,
"eval_accuracy": 0.9899277374614524,
"eval_f1": 0.9893141089045129,
"eval_loss": 0.04433906078338623,
"eval_precision": 0.9892729298062706,
"eval_recall": 0.9893552914311087,
"eval_runtime": 20.0023,
"eval_samples_per_second": 732.367,
"eval_steps_per_second": 2.9,
"step": 2432
},
{
"epoch": 19.53,
"learning_rate": 2.838744588744589e-05,
"loss": 0.0103,
"step": 2500
},
{
"epoch": 19.99,
"eval_accuracy": 0.9895978766167017,
"eval_f1": 0.9889502105288197,
"eval_loss": 0.04566018655896187,
"eval_precision": 0.9890129497315686,
"eval_recall": 0.9888874792854367,
"eval_runtime": 18.9384,
"eval_samples_per_second": 773.508,
"eval_steps_per_second": 3.063,
"step": 2560
},
{
"epoch": 20.99,
"eval_accuracy": 0.9898049985424754,
"eval_f1": 0.9891496101074732,
"eval_loss": 0.04549423232674599,
"eval_precision": 0.9891064774439071,
"eval_recall": 0.9891927465330362,
"eval_runtime": 18.8835,
"eval_samples_per_second": 775.757,
"eval_steps_per_second": 3.071,
"step": 2688
},
{
"epoch": 21.99,
"eval_accuracy": 0.9897704782215131,
"eval_f1": 0.9891315336173181,
"eval_loss": 0.04684610292315483,
"eval_precision": 0.989109966739214,
"eval_recall": 0.9891531014359454,
"eval_runtime": 18.9876,
"eval_samples_per_second": 771.504,
"eval_steps_per_second": 3.055,
"step": 2816
},
{
"epoch": 22.99,
"eval_accuracy": 0.9898471900458736,
"eval_f1": 0.9891676858093711,
"eval_loss": 0.049145638942718506,
"eval_precision": 0.9891029884528939,
"eval_recall": 0.9892323916301271,
"eval_runtime": 18.5499,
"eval_samples_per_second": 789.708,
"eval_steps_per_second": 3.127,
"step": 2944
},
{
"epoch": 23.43,
"learning_rate": 2.2976190476190476e-05,
"loss": 0.0073,
"step": 3000
},
{
"epoch": 23.99,
"eval_accuracy": 0.9899622577824145,
"eval_f1": 0.9894013665041952,
"eval_loss": 0.04954079911112785,
"eval_precision": 0.9893562619667725,
"eval_recall": 0.9894464751544176,
"eval_runtime": 19.048,
"eval_samples_per_second": 769.056,
"eval_steps_per_second": 3.045,
"step": 3072
},
{
"epoch": 24.99,
"eval_accuracy": 0.9897858205863852,
"eval_f1": 0.9891747989478087,
"eval_loss": 0.05031678453087807,
"eval_precision": 0.9891846031248885,
"eval_recall": 0.9891649949650727,
"eval_runtime": 18.6731,
"eval_samples_per_second": 784.498,
"eval_steps_per_second": 3.106,
"step": 3200
},
{
"epoch": 25.99,
"eval_accuracy": 0.9898126697249114,
"eval_f1": 0.9892364414843007,
"eval_loss": 0.05185426026582718,
"eval_precision": 0.9892285979337303,
"eval_recall": 0.9892442851592543,
"eval_runtime": 19.572,
"eval_samples_per_second": 748.468,
"eval_steps_per_second": 2.963,
"step": 3328
},
{
"epoch": 26.99,
"eval_accuracy": 0.9898548612283097,
"eval_f1": 0.9892308576661506,
"eval_loss": 0.05215698853135109,
"eval_precision": 0.9891896820331485,
"eval_recall": 0.989272036727218,
"eval_runtime": 19.399,
"eval_samples_per_second": 755.14,
"eval_steps_per_second": 2.99,
"step": 3456
},
{
"epoch": 27.34,
"learning_rate": 1.7564935064935065e-05,
"loss": 0.0052,
"step": 3500
},
{
"epoch": 27.99,
"eval_accuracy": 0.9898702035931819,
"eval_f1": 0.9892028504316283,
"eval_loss": 0.05260332301259041,
"eval_precision": 0.9891852033919135,
"eval_recall": 0.9892204981009999,
"eval_runtime": 19.6509,
"eval_samples_per_second": 745.461,
"eval_steps_per_second": 2.952,
"step": 3584
},
{
"epoch": 28.99,
"eval_accuracy": 0.9898663680019638,
"eval_f1": 0.9891989716326818,
"eval_loss": 0.05352339521050453,
"eval_precision": 0.9891734819683569,
"eval_recall": 0.989224462610709,
"eval_runtime": 19.4431,
"eval_samples_per_second": 753.429,
"eval_steps_per_second": 2.983,
"step": 3712
},
{
"epoch": 29.99,
"eval_accuracy": 0.9900121204682489,
"eval_f1": 0.9893592137553174,
"eval_loss": 0.054358094930648804,
"eval_precision": 0.9893631361106265,
"eval_recall": 0.9893552914311087,
"eval_runtime": 18.4688,
"eval_samples_per_second": 793.177,
"eval_steps_per_second": 3.14,
"step": 3840
},
{
"epoch": 30.99,
"eval_accuracy": 0.9899660933736326,
"eval_f1": 0.9893714865647028,
"eval_loss": 0.05478381738066673,
"eval_precision": 0.989340109572098,
"eval_recall": 0.9894028655476177,
"eval_runtime": 18.761,
"eval_samples_per_second": 780.821,
"eval_steps_per_second": 3.092,
"step": 3968
},
{
"epoch": 31.25,
"learning_rate": 1.2153679653679655e-05,
"loss": 0.0038,
"step": 4000
},
{
"epoch": 31.99,
"eval_accuracy": 0.9898702035931819,
"eval_f1": 0.989232477006026,
"eval_loss": 0.05625994876027107,
"eval_precision": 0.9892246334868896,
"eval_recall": 0.9892403206495453,
"eval_runtime": 19.3617,
"eval_samples_per_second": 756.597,
"eval_steps_per_second": 2.996,
"step": 4096
},
{
"epoch": 32.99,
"eval_accuracy": 0.9900466407892112,
"eval_f1": 0.9894185977362381,
"eval_loss": 0.05615779384970665,
"eval_precision": 0.9894303656950744,
"eval_recall": 0.9894068300573268,
"eval_runtime": 18.6932,
"eval_samples_per_second": 783.655,
"eval_steps_per_second": 3.103,
"step": 4224
},
{
"epoch": 33.99,
"eval_accuracy": 0.9898318476810015,
"eval_f1": 0.989171306638546,
"eval_loss": 0.057734012603759766,
"eval_precision": 0.9891379754613387,
"eval_recall": 0.9892046400621635,
"eval_runtime": 18.8622,
"eval_samples_per_second": 776.632,
"eval_steps_per_second": 3.075,
"step": 4352
},
{
"epoch": 34.99,
"eval_accuracy": 0.9900658187453014,
"eval_f1": 0.9894462659525121,
"eval_loss": 0.05798300728201866,
"eval_precision": 0.989465880076756,
"eval_recall": 0.9894266526058723,
"eval_runtime": 19.4979,
"eval_samples_per_second": 751.311,
"eval_steps_per_second": 2.975,
"step": 4480
},
{
"epoch": 35.16,
"learning_rate": 6.742424242424243e-06,
"loss": 0.003,
"step": 4500
},
{
"epoch": 35.99,
"eval_accuracy": 0.9899776001472868,
"eval_f1": 0.9893636842960725,
"eval_loss": 0.05809687077999115,
"eval_precision": 0.9893205422976294,
"eval_recall": 0.9894068300573268,
"eval_runtime": 19.0851,
"eval_samples_per_second": 767.563,
"eval_steps_per_second": 3.039,
"step": 4608
},
{
"epoch": 36.99,
"eval_accuracy": 0.989889381549272,
"eval_f1": 0.9892901530063094,
"eval_loss": 0.058496102690696716,
"eval_precision": 0.9892646609924242,
"eval_recall": 0.9893156463340179,
"eval_runtime": 19.0831,
"eval_samples_per_second": 767.642,
"eval_steps_per_second": 3.039,
"step": 4736
},
{
"epoch": 37.99,
"eval_accuracy": 0.9899699289648506,
"eval_f1": 0.9893475735699306,
"eval_loss": 0.05856472626328468,
"eval_precision": 0.9893279629570898,
"eval_recall": 0.989367184960236,
"eval_runtime": 19.3383,
"eval_samples_per_second": 757.512,
"eval_steps_per_second": 2.999,
"step": 4864
},
{
"epoch": 38.99,
"eval_accuracy": 0.9899737645560687,
"eval_f1": 0.9893515802159814,
"eval_loss": 0.05881791561841965,
"eval_precision": 0.9893280475718533,
"eval_recall": 0.9893751139796542,
"eval_runtime": 19.2023,
"eval_samples_per_second": 762.879,
"eval_steps_per_second": 3.02,
"step": 4992
},
{
"epoch": 39.06,
"learning_rate": 1.3311688311688312e-06,
"loss": 0.0024,
"step": 5000
},
{
"epoch": 39.99,
"eval_accuracy": 0.9899891069209408,
"eval_f1": 0.9893752824668374,
"eval_loss": 0.058905407786369324,
"eval_precision": 0.9893595934127796,
"eval_recall": 0.9893909720184905,
"eval_runtime": 18.6772,
"eval_samples_per_second": 784.326,
"eval_steps_per_second": 3.105,
"step": 5120
},
{
"epoch": 39.99,
"step": 5120,
"total_flos": 3.2246769193641984e+17,
"train_loss": 0.08320926361484453,
"train_runtime": 4249.1875,
"train_samples_per_second": 1241.018,
"train_steps_per_second": 1.205
}
],
"max_steps": 5120,
"num_train_epochs": 40,
"total_flos": 3.2246769193641984e+17,
"trial_name": null,
"trial_params": null
}