xlmr-ner-slavic / trainer_state.json
ivlcic's picture
Upload 13 files
59e509a verified
raw
history blame contribute delete
No virus
19.9 kB
{
"best_metric": 0.9324748546966902,
"best_model_checkpoint": "/home/nikola/projects/neuroticla/result/ner/xlmrb-sl_hr_sr_bs_mk_sq_cs_bg_pl_ru_sk_uk/checkpoint-433760",
"epoch": 40.0,
"eval_steps": 500,
"global_step": 433760,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.95e-05,
"loss": 0.1128,
"step": 10844
},
{
"epoch": 1.0,
"eval_accuracy": 0.9769370487250145,
"eval_f1": 0.9010081585905575,
"eval_loss": 0.09421534836292267,
"eval_precision": 0.9111733920553665,
"eval_recall": 0.8910672334713056,
"eval_runtime": 89.8183,
"eval_samples_per_second": 301.776,
"eval_steps_per_second": 15.097,
"step": 10844
},
{
"epoch": 2.0,
"learning_rate": 1.9e-05,
"loss": 0.0676,
"step": 21688
},
{
"epoch": 2.0,
"eval_accuracy": 0.9799811843427059,
"eval_f1": 0.9143064683983713,
"eval_loss": 0.08378221094608307,
"eval_precision": 0.9133559397719819,
"eval_recall": 0.9152589775142633,
"eval_runtime": 89.3428,
"eval_samples_per_second": 303.382,
"eval_steps_per_second": 15.177,
"step": 21688
},
{
"epoch": 3.0,
"learning_rate": 1.8500000000000002e-05,
"loss": 0.0507,
"step": 32532
},
{
"epoch": 3.0,
"eval_accuracy": 0.9804305567434127,
"eval_f1": 0.9163790332723132,
"eval_loss": 0.08407577127218246,
"eval_precision": 0.9157519899455383,
"eval_recall": 0.9170069358988702,
"eval_runtime": 90.9562,
"eval_samples_per_second": 298.001,
"eval_steps_per_second": 14.908,
"step": 32532
},
{
"epoch": 4.0,
"learning_rate": 1.8e-05,
"loss": 0.0401,
"step": 43376
},
{
"epoch": 4.0,
"eval_accuracy": 0.9814757099721534,
"eval_f1": 0.9221617781244994,
"eval_loss": 0.08777288347482681,
"eval_precision": 0.9186871140101311,
"eval_recall": 0.9256628258194429,
"eval_runtime": 89.4976,
"eval_samples_per_second": 302.857,
"eval_steps_per_second": 15.151,
"step": 43376
},
{
"epoch": 5.0,
"learning_rate": 1.7500000000000002e-05,
"loss": 0.032,
"step": 54220
},
{
"epoch": 5.0,
"eval_accuracy": 0.9822106512855675,
"eval_f1": 0.9250753982350196,
"eval_loss": 0.09507809579372406,
"eval_precision": 0.921564273720146,
"eval_recall": 0.9286133795726591,
"eval_runtime": 90.924,
"eval_samples_per_second": 298.106,
"eval_steps_per_second": 14.914,
"step": 54220
},
{
"epoch": 6.0,
"learning_rate": 1.7e-05,
"loss": 0.0264,
"step": 65064
},
{
"epoch": 6.0,
"eval_accuracy": 0.9809553077403671,
"eval_f1": 0.9212521094420008,
"eval_loss": 0.1004280373454094,
"eval_precision": 0.9113936176035908,
"eval_recall": 0.9313262109855689,
"eval_runtime": 90.6899,
"eval_samples_per_second": 298.875,
"eval_steps_per_second": 14.952,
"step": 65064
},
{
"epoch": 7.0,
"learning_rate": 1.65e-05,
"loss": 0.0215,
"step": 75908
},
{
"epoch": 7.0,
"eval_accuracy": 0.9824875226679385,
"eval_f1": 0.92642423737521,
"eval_loss": 0.1009296178817749,
"eval_precision": 0.9199227905694195,
"eval_recall": 0.9330182347018682,
"eval_runtime": 90.3482,
"eval_samples_per_second": 300.006,
"eval_steps_per_second": 15.009,
"step": 75908
},
{
"epoch": 8.0,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0177,
"step": 86752
},
{
"epoch": 8.0,
"eval_accuracy": 0.9821120792750898,
"eval_f1": 0.9245811025699784,
"eval_loss": 0.11745402961969376,
"eval_precision": 0.9183000675890037,
"eval_recall": 0.9309486519744938,
"eval_runtime": 89.2897,
"eval_samples_per_second": 303.563,
"eval_steps_per_second": 15.187,
"step": 86752
},
{
"epoch": 9.0,
"learning_rate": 1.55e-05,
"loss": 0.015,
"step": 97596
},
{
"epoch": 9.0,
"eval_accuracy": 0.9825324599080091,
"eval_f1": 0.926065441605738,
"eval_loss": 0.11713194102048874,
"eval_precision": 0.9205632012380479,
"eval_recall": 0.9316338516612597,
"eval_runtime": 89.7586,
"eval_samples_per_second": 301.977,
"eval_steps_per_second": 15.107,
"step": 97596
},
{
"epoch": 10.0,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.0133,
"step": 108440
},
{
"epoch": 10.0,
"eval_accuracy": 0.9820569949162935,
"eval_f1": 0.9250865136228494,
"eval_loss": 0.13051354885101318,
"eval_precision": 0.9212697796330453,
"eval_recall": 0.9289350039154268,
"eval_runtime": 89.1452,
"eval_samples_per_second": 304.054,
"eval_steps_per_second": 15.211,
"step": 108440
},
{
"epoch": 11.0,
"learning_rate": 1.45e-05,
"loss": 0.0112,
"step": 119284
},
{
"epoch": 11.0,
"eval_accuracy": 0.9825614516757967,
"eval_f1": 0.9272675473035418,
"eval_loss": 0.1286834180355072,
"eval_precision": 0.9215873147419164,
"eval_recall": 0.9330182347018682,
"eval_runtime": 89.577,
"eval_samples_per_second": 302.589,
"eval_steps_per_second": 15.138,
"step": 119284
},
{
"epoch": 12.0,
"learning_rate": 1.4e-05,
"loss": 0.01,
"step": 130128
},
{
"epoch": 12.0,
"eval_accuracy": 0.982439686251089,
"eval_f1": 0.9262543854764159,
"eval_loss": 0.13614533841609955,
"eval_precision": 0.9221972721224219,
"eval_recall": 0.9303473542901891,
"eval_runtime": 88.3792,
"eval_samples_per_second": 306.69,
"eval_steps_per_second": 15.343,
"step": 130128
},
{
"epoch": 13.0,
"learning_rate": 1.3500000000000001e-05,
"loss": 0.0086,
"step": 140972
},
{
"epoch": 13.0,
"eval_accuracy": 0.9824309887207527,
"eval_f1": 0.9259202794101342,
"eval_loss": 0.13529813289642334,
"eval_precision": 0.9177794263105836,
"eval_recall": 0.9342068464034008,
"eval_runtime": 88.5532,
"eval_samples_per_second": 306.087,
"eval_steps_per_second": 15.313,
"step": 140972
},
{
"epoch": 14.0,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.0079,
"step": 151816
},
{
"epoch": 14.0,
"eval_accuracy": 0.9827180072218493,
"eval_f1": 0.9273193405173312,
"eval_loss": 0.14126552641391754,
"eval_precision": 0.9215123103053067,
"eval_recall": 0.9332000223738673,
"eval_runtime": 89.1586,
"eval_samples_per_second": 304.009,
"eval_steps_per_second": 15.209,
"step": 151816
},
{
"epoch": 15.0,
"learning_rate": 1.25e-05,
"loss": 0.007,
"step": 162660
},
{
"epoch": 15.0,
"eval_accuracy": 0.982528111142841,
"eval_f1": 0.9268309701909121,
"eval_loss": 0.1539030820131302,
"eval_precision": 0.9249721448467967,
"eval_recall": 0.9286972815751202,
"eval_runtime": 88.721,
"eval_samples_per_second": 305.508,
"eval_steps_per_second": 15.284,
"step": 162660
},
{
"epoch": 16.0,
"learning_rate": 1.2e-05,
"loss": 0.0064,
"step": 173504
},
{
"epoch": 16.0,
"eval_accuracy": 0.982581745913248,
"eval_f1": 0.9264010897364617,
"eval_loss": 0.15460434556007385,
"eval_precision": 0.9208715596330275,
"eval_recall": 0.9319974270052579,
"eval_runtime": 88.6075,
"eval_samples_per_second": 305.9,
"eval_steps_per_second": 15.303,
"step": 173504
},
{
"epoch": 17.0,
"learning_rate": 1.15e-05,
"loss": 0.0057,
"step": 184348
},
{
"epoch": 17.0,
"eval_accuracy": 0.982916600831194,
"eval_f1": 0.9276149465203501,
"eval_loss": 0.1507822722196579,
"eval_precision": 0.9214963846111387,
"eval_recall": 0.9338153037252489,
"eval_runtime": 89.1007,
"eval_samples_per_second": 304.206,
"eval_steps_per_second": 15.219,
"step": 184348
},
{
"epoch": 18.0,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.0053,
"step": 195192
},
{
"epoch": 18.0,
"eval_accuracy": 0.9827933858180969,
"eval_f1": 0.927893329629501,
"eval_loss": 0.15079163014888763,
"eval_precision": 0.9216781866092739,
"eval_recall": 0.934192862736324,
"eval_runtime": 90.146,
"eval_samples_per_second": 300.679,
"eval_steps_per_second": 15.042,
"step": 195192
},
{
"epoch": 19.0,
"learning_rate": 1.0500000000000001e-05,
"loss": 0.0048,
"step": 206036
},
{
"epoch": 19.0,
"eval_accuracy": 0.9825150648473366,
"eval_f1": 0.9270733168889446,
"eval_loss": 0.16124233603477478,
"eval_precision": 0.9239187800338879,
"eval_recall": 0.930249468620651,
"eval_runtime": 89.3088,
"eval_samples_per_second": 303.497,
"eval_steps_per_second": 15.183,
"step": 206036
},
{
"epoch": 20.0,
"learning_rate": 1e-05,
"loss": 0.0043,
"step": 216880
},
{
"epoch": 20.0,
"eval_accuracy": 0.9827440998128582,
"eval_f1": 0.9272743748997957,
"eval_loss": 0.1557987779378891,
"eval_precision": 0.9244839808186809,
"eval_recall": 0.9300816646157288,
"eval_runtime": 88.3664,
"eval_samples_per_second": 306.734,
"eval_steps_per_second": 15.345,
"step": 216880
},
{
"epoch": 21.0,
"learning_rate": 9.5e-06,
"loss": 0.0041,
"step": 227724
},
{
"epoch": 21.0,
"eval_accuracy": 0.9828412222349464,
"eval_f1": 0.92726855170398,
"eval_loss": 0.1576606184244156,
"eval_precision": 0.9229714190715008,
"eval_recall": 0.9316058843271059,
"eval_runtime": 88.9473,
"eval_samples_per_second": 304.731,
"eval_steps_per_second": 15.245,
"step": 227724
},
{
"epoch": 22.0,
"learning_rate": 9e-06,
"loss": 0.0036,
"step": 238568
},
{
"epoch": 22.0,
"eval_accuracy": 0.9831050473218129,
"eval_f1": 0.9286320918900104,
"eval_loss": 0.16860993206501007,
"eval_precision": 0.9246028445479497,
"eval_recall": 0.9326966103591006,
"eval_runtime": 88.4212,
"eval_samples_per_second": 306.544,
"eval_steps_per_second": 15.336,
"step": 238568
},
{
"epoch": 23.0,
"learning_rate": 8.5e-06,
"loss": 0.0033,
"step": 249412
},
{
"epoch": 23.0,
"eval_accuracy": 0.9828832602982384,
"eval_f1": 0.9276945586288581,
"eval_loss": 0.16733527183532715,
"eval_precision": 0.9244355577772224,
"eval_recall": 0.9309766193086475,
"eval_runtime": 88.9341,
"eval_samples_per_second": 304.776,
"eval_steps_per_second": 15.247,
"step": 249412
},
{
"epoch": 24.0,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0031,
"step": 260256
},
{
"epoch": 24.0,
"eval_accuracy": 0.9835123816592278,
"eval_f1": 0.9298053519496189,
"eval_loss": 0.16969779133796692,
"eval_precision": 0.9274573913043478,
"eval_recall": 0.9321652310101801,
"eval_runtime": 87.785,
"eval_samples_per_second": 308.766,
"eval_steps_per_second": 15.447,
"step": 260256
},
{
"epoch": 25.0,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0029,
"step": 271100
},
{
"epoch": 25.0,
"eval_accuracy": 0.983328283933777,
"eval_f1": 0.9295600197618832,
"eval_loss": 0.16894972324371338,
"eval_precision": 0.9251374672779401,
"eval_recall": 0.9340250587314017,
"eval_runtime": 88.675,
"eval_samples_per_second": 305.667,
"eval_steps_per_second": 15.292,
"step": 271100
},
{
"epoch": 26.0,
"learning_rate": 7e-06,
"loss": 0.0026,
"step": 281944
},
{
"epoch": 26.0,
"eval_accuracy": 0.9831079464985917,
"eval_f1": 0.9281980972530569,
"eval_loss": 0.1714058518409729,
"eval_precision": 0.9239348804987877,
"eval_recall": 0.9325008390200246,
"eval_runtime": 89.606,
"eval_samples_per_second": 302.491,
"eval_steps_per_second": 15.133,
"step": 281944
},
{
"epoch": 27.0,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0022,
"step": 292788
},
{
"epoch": 27.0,
"eval_accuracy": 0.9831311399128217,
"eval_f1": 0.9291277150061997,
"eval_loss": 0.16884349286556244,
"eval_precision": 0.9257100024985425,
"eval_recall": 0.9325707573554088,
"eval_runtime": 88.5198,
"eval_samples_per_second": 306.203,
"eval_steps_per_second": 15.319,
"step": 292788
},
{
"epoch": 28.0,
"learning_rate": 6e-06,
"loss": 0.002,
"step": 303632
},
{
"epoch": 28.0,
"eval_accuracy": 0.9833819187041839,
"eval_f1": 0.9299846945874496,
"eval_loss": 0.17876744270324707,
"eval_precision": 0.9253752007531705,
"eval_recall": 0.9346403400827833,
"eval_runtime": 88.8069,
"eval_samples_per_second": 305.213,
"eval_steps_per_second": 15.269,
"step": 303632
},
{
"epoch": 29.0,
"learning_rate": 5.500000000000001e-06,
"loss": 0.0019,
"step": 314476
},
{
"epoch": 29.0,
"eval_accuracy": 0.9836268991419886,
"eval_f1": 0.9311766262342112,
"eval_loss": 0.1778053343296051,
"eval_precision": 0.9273667859421375,
"eval_recall": 0.9350178990938584,
"eval_runtime": 89.2258,
"eval_samples_per_second": 303.78,
"eval_steps_per_second": 15.197,
"step": 314476
},
{
"epoch": 30.0,
"learning_rate": 5e-06,
"loss": 0.0018,
"step": 325320
},
{
"epoch": 30.0,
"eval_accuracy": 0.9834732427727146,
"eval_f1": 0.9307018886832106,
"eval_loss": 0.18135882914066315,
"eval_precision": 0.9263569113124429,
"eval_recall": 0.9350878174292426,
"eval_runtime": 88.7746,
"eval_samples_per_second": 305.324,
"eval_steps_per_second": 15.275,
"step": 325320
},
{
"epoch": 31.0,
"learning_rate": 4.5e-06,
"loss": 0.0016,
"step": 336164
},
{
"epoch": 31.0,
"eval_accuracy": 0.983370321997069,
"eval_f1": 0.9299260330348701,
"eval_loss": 0.18739104270935059,
"eval_precision": 0.9246295067462951,
"eval_recall": 0.9352835887683186,
"eval_runtime": 89.214,
"eval_samples_per_second": 303.82,
"eval_steps_per_second": 15.199,
"step": 336164
},
{
"epoch": 32.0,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0014,
"step": 347008
},
{
"epoch": 32.0,
"eval_accuracy": 0.9836066049045373,
"eval_f1": 0.9308149819620296,
"eval_loss": 0.18249443173408508,
"eval_precision": 0.927185947775897,
"eval_recall": 0.9344725360778611,
"eval_runtime": 89.9015,
"eval_samples_per_second": 301.497,
"eval_steps_per_second": 15.083,
"step": 347008
},
{
"epoch": 33.0,
"learning_rate": 3.5e-06,
"loss": 0.0015,
"step": 357852
},
{
"epoch": 33.0,
"eval_accuracy": 0.9833732211738477,
"eval_f1": 0.9303621946290954,
"eval_loss": 0.18371780216693878,
"eval_precision": 0.927196466764812,
"eval_recall": 0.9335496140507887,
"eval_runtime": 89.8085,
"eval_samples_per_second": 301.809,
"eval_steps_per_second": 15.099,
"step": 357852
},
{
"epoch": 34.0,
"learning_rate": 3e-06,
"loss": 0.0013,
"step": 368696
},
{
"epoch": 34.0,
"eval_accuracy": 0.9834457005933165,
"eval_f1": 0.9307165143748607,
"eval_loss": 0.1884261518716812,
"eval_precision": 0.9271006771006771,
"eval_recall": 0.9343606667412462,
"eval_runtime": 90.3295,
"eval_samples_per_second": 300.068,
"eval_steps_per_second": 15.012,
"step": 368696
},
{
"epoch": 35.0,
"learning_rate": 2.5e-06,
"loss": 0.0012,
"step": 379540
},
{
"epoch": 35.0,
"eval_accuracy": 0.9836544413213868,
"eval_f1": 0.9312036572448013,
"eval_loss": 0.18962261080741882,
"eval_precision": 0.9281507001555901,
"eval_recall": 0.9342767647387851,
"eval_runtime": 89.7245,
"eval_samples_per_second": 302.092,
"eval_steps_per_second": 15.113,
"step": 379540
},
{
"epoch": 36.0,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0011,
"step": 390384
},
{
"epoch": 36.0,
"eval_accuracy": 0.9836124032580948,
"eval_f1": 0.9311286323238515,
"eval_loss": 0.18686576187610626,
"eval_precision": 0.9275881210102692,
"eval_recall": 0.9346962747510907,
"eval_runtime": 90.2423,
"eval_samples_per_second": 300.358,
"eval_steps_per_second": 15.026,
"step": 390384
},
{
"epoch": 37.0,
"learning_rate": 1.5e-06,
"loss": 0.001,
"step": 401228
},
{
"epoch": 37.0,
"eval_accuracy": 0.9836442942026612,
"eval_f1": 0.9311170842443909,
"eval_loss": 0.18996329605579376,
"eval_precision": 0.9273587261075817,
"eval_recall": 0.9349060297572436,
"eval_runtime": 88.7307,
"eval_samples_per_second": 305.475,
"eval_steps_per_second": 15.282,
"step": 401228
},
{
"epoch": 38.0,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.001,
"step": 412072
},
{
"epoch": 38.0,
"eval_accuracy": 0.9837356182711919,
"eval_f1": 0.9316876434183994,
"eval_loss": 0.19159720838069916,
"eval_precision": 0.9266231431021605,
"eval_recall": 0.9368078084796957,
"eval_runtime": 88.7508,
"eval_samples_per_second": 305.406,
"eval_steps_per_second": 15.279,
"step": 412072
},
{
"epoch": 39.0,
"learning_rate": 5.000000000000001e-07,
"loss": 0.001,
"step": 422916
},
{
"epoch": 39.0,
"eval_accuracy": 0.9837994001603245,
"eval_f1": 0.9321045231167601,
"eval_loss": 0.1949097365140915,
"eval_precision": 0.927983367983368,
"eval_recall": 0.9362624454636984,
"eval_runtime": 88.5519,
"eval_samples_per_second": 306.092,
"eval_steps_per_second": 15.313,
"step": 422916
},
{
"epoch": 40.0,
"learning_rate": 0.0,
"loss": 0.0009,
"step": 433760
},
{
"epoch": 40.0,
"eval_accuracy": 0.9839153672314747,
"eval_f1": 0.9324748546966902,
"eval_loss": 0.19464968144893646,
"eval_precision": 0.9283328482530179,
"eval_recall": 0.9366539881418503,
"eval_runtime": 88.4748,
"eval_samples_per_second": 306.358,
"eval_steps_per_second": 15.326,
"step": 433760
}
],
"logging_steps": 500,
"max_steps": 433760,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 1.1334359572090675e+18,
"trial_name": null,
"trial_params": null
}