{ "best_metric": 0.9324748546966902, "best_model_checkpoint": "/home/nikola/projects/neuroticla/result/ner/xlmrb-sl_hr_sr_bs_mk_sq_cs_bg_pl_ru_sk_uk/checkpoint-433760", "epoch": 40.0, "eval_steps": 500, "global_step": 433760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.95e-05, "loss": 0.1128, "step": 10844 }, { "epoch": 1.0, "eval_accuracy": 0.9769370487250145, "eval_f1": 0.9010081585905575, "eval_loss": 0.09421534836292267, "eval_precision": 0.9111733920553665, "eval_recall": 0.8910672334713056, "eval_runtime": 89.8183, "eval_samples_per_second": 301.776, "eval_steps_per_second": 15.097, "step": 10844 }, { "epoch": 2.0, "learning_rate": 1.9e-05, "loss": 0.0676, "step": 21688 }, { "epoch": 2.0, "eval_accuracy": 0.9799811843427059, "eval_f1": 0.9143064683983713, "eval_loss": 0.08378221094608307, "eval_precision": 0.9133559397719819, "eval_recall": 0.9152589775142633, "eval_runtime": 89.3428, "eval_samples_per_second": 303.382, "eval_steps_per_second": 15.177, "step": 21688 }, { "epoch": 3.0, "learning_rate": 1.8500000000000002e-05, "loss": 0.0507, "step": 32532 }, { "epoch": 3.0, "eval_accuracy": 0.9804305567434127, "eval_f1": 0.9163790332723132, "eval_loss": 0.08407577127218246, "eval_precision": 0.9157519899455383, "eval_recall": 0.9170069358988702, "eval_runtime": 90.9562, "eval_samples_per_second": 298.001, "eval_steps_per_second": 14.908, "step": 32532 }, { "epoch": 4.0, "learning_rate": 1.8e-05, "loss": 0.0401, "step": 43376 }, { "epoch": 4.0, "eval_accuracy": 0.9814757099721534, "eval_f1": 0.9221617781244994, "eval_loss": 0.08777288347482681, "eval_precision": 0.9186871140101311, "eval_recall": 0.9256628258194429, "eval_runtime": 89.4976, "eval_samples_per_second": 302.857, "eval_steps_per_second": 15.151, "step": 43376 }, { "epoch": 5.0, "learning_rate": 1.7500000000000002e-05, "loss": 0.032, "step": 54220 }, { "epoch": 5.0, "eval_accuracy": 0.9822106512855675, "eval_f1": 0.9250753982350196, "eval_loss": 0.09507809579372406, "eval_precision": 0.921564273720146, "eval_recall": 0.9286133795726591, "eval_runtime": 90.924, "eval_samples_per_second": 298.106, "eval_steps_per_second": 14.914, "step": 54220 }, { "epoch": 6.0, "learning_rate": 1.7e-05, "loss": 0.0264, "step": 65064 }, { "epoch": 6.0, "eval_accuracy": 0.9809553077403671, "eval_f1": 0.9212521094420008, "eval_loss": 0.1004280373454094, "eval_precision": 0.9113936176035908, "eval_recall": 0.9313262109855689, "eval_runtime": 90.6899, "eval_samples_per_second": 298.875, "eval_steps_per_second": 14.952, "step": 65064 }, { "epoch": 7.0, "learning_rate": 1.65e-05, "loss": 0.0215, "step": 75908 }, { "epoch": 7.0, "eval_accuracy": 0.9824875226679385, "eval_f1": 0.92642423737521, "eval_loss": 0.1009296178817749, "eval_precision": 0.9199227905694195, "eval_recall": 0.9330182347018682, "eval_runtime": 90.3482, "eval_samples_per_second": 300.006, "eval_steps_per_second": 15.009, "step": 75908 }, { "epoch": 8.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.0177, "step": 86752 }, { "epoch": 8.0, "eval_accuracy": 0.9821120792750898, "eval_f1": 0.9245811025699784, "eval_loss": 0.11745402961969376, "eval_precision": 0.9183000675890037, "eval_recall": 0.9309486519744938, "eval_runtime": 89.2897, "eval_samples_per_second": 303.563, "eval_steps_per_second": 15.187, "step": 86752 }, { "epoch": 9.0, "learning_rate": 1.55e-05, "loss": 0.015, "step": 97596 }, { "epoch": 9.0, "eval_accuracy": 0.9825324599080091, "eval_f1": 0.926065441605738, "eval_loss": 0.11713194102048874, "eval_precision": 0.9205632012380479, "eval_recall": 0.9316338516612597, "eval_runtime": 89.7586, "eval_samples_per_second": 301.977, "eval_steps_per_second": 15.107, "step": 97596 }, { "epoch": 10.0, "learning_rate": 1.5000000000000002e-05, "loss": 0.0133, "step": 108440 }, { "epoch": 10.0, "eval_accuracy": 0.9820569949162935, "eval_f1": 0.9250865136228494, "eval_loss": 0.13051354885101318, "eval_precision": 0.9212697796330453, "eval_recall": 0.9289350039154268, "eval_runtime": 89.1452, "eval_samples_per_second": 304.054, "eval_steps_per_second": 15.211, "step": 108440 }, { "epoch": 11.0, "learning_rate": 1.45e-05, "loss": 0.0112, "step": 119284 }, { "epoch": 11.0, "eval_accuracy": 0.9825614516757967, "eval_f1": 0.9272675473035418, "eval_loss": 0.1286834180355072, "eval_precision": 0.9215873147419164, "eval_recall": 0.9330182347018682, "eval_runtime": 89.577, "eval_samples_per_second": 302.589, "eval_steps_per_second": 15.138, "step": 119284 }, { "epoch": 12.0, "learning_rate": 1.4e-05, "loss": 0.01, "step": 130128 }, { "epoch": 12.0, "eval_accuracy": 0.982439686251089, "eval_f1": 0.9262543854764159, "eval_loss": 0.13614533841609955, "eval_precision": 0.9221972721224219, "eval_recall": 0.9303473542901891, "eval_runtime": 88.3792, "eval_samples_per_second": 306.69, "eval_steps_per_second": 15.343, "step": 130128 }, { "epoch": 13.0, "learning_rate": 1.3500000000000001e-05, "loss": 0.0086, "step": 140972 }, { "epoch": 13.0, "eval_accuracy": 0.9824309887207527, "eval_f1": 0.9259202794101342, "eval_loss": 0.13529813289642334, "eval_precision": 0.9177794263105836, "eval_recall": 0.9342068464034008, "eval_runtime": 88.5532, "eval_samples_per_second": 306.087, "eval_steps_per_second": 15.313, "step": 140972 }, { "epoch": 14.0, "learning_rate": 1.3000000000000001e-05, "loss": 0.0079, "step": 151816 }, { "epoch": 14.0, "eval_accuracy": 0.9827180072218493, "eval_f1": 0.9273193405173312, "eval_loss": 0.14126552641391754, "eval_precision": 0.9215123103053067, "eval_recall": 0.9332000223738673, "eval_runtime": 89.1586, "eval_samples_per_second": 304.009, "eval_steps_per_second": 15.209, "step": 151816 }, { "epoch": 15.0, "learning_rate": 1.25e-05, "loss": 0.007, "step": 162660 }, { "epoch": 15.0, "eval_accuracy": 0.982528111142841, "eval_f1": 0.9268309701909121, "eval_loss": 0.1539030820131302, "eval_precision": 0.9249721448467967, "eval_recall": 0.9286972815751202, "eval_runtime": 88.721, "eval_samples_per_second": 305.508, "eval_steps_per_second": 15.284, "step": 162660 }, { "epoch": 16.0, "learning_rate": 1.2e-05, "loss": 0.0064, "step": 173504 }, { "epoch": 16.0, "eval_accuracy": 0.982581745913248, "eval_f1": 0.9264010897364617, "eval_loss": 0.15460434556007385, "eval_precision": 0.9208715596330275, "eval_recall": 0.9319974270052579, "eval_runtime": 88.6075, "eval_samples_per_second": 305.9, "eval_steps_per_second": 15.303, "step": 173504 }, { "epoch": 17.0, "learning_rate": 1.15e-05, "loss": 0.0057, "step": 184348 }, { "epoch": 17.0, "eval_accuracy": 0.982916600831194, "eval_f1": 0.9276149465203501, "eval_loss": 0.1507822722196579, "eval_precision": 0.9214963846111387, "eval_recall": 0.9338153037252489, "eval_runtime": 89.1007, "eval_samples_per_second": 304.206, "eval_steps_per_second": 15.219, "step": 184348 }, { "epoch": 18.0, "learning_rate": 1.1000000000000001e-05, "loss": 0.0053, "step": 195192 }, { "epoch": 18.0, "eval_accuracy": 0.9827933858180969, "eval_f1": 0.927893329629501, "eval_loss": 0.15079163014888763, "eval_precision": 0.9216781866092739, "eval_recall": 0.934192862736324, "eval_runtime": 90.146, "eval_samples_per_second": 300.679, "eval_steps_per_second": 15.042, "step": 195192 }, { "epoch": 19.0, "learning_rate": 1.0500000000000001e-05, "loss": 0.0048, "step": 206036 }, { "epoch": 19.0, "eval_accuracy": 0.9825150648473366, "eval_f1": 0.9270733168889446, "eval_loss": 0.16124233603477478, "eval_precision": 0.9239187800338879, "eval_recall": 0.930249468620651, "eval_runtime": 89.3088, "eval_samples_per_second": 303.497, "eval_steps_per_second": 15.183, "step": 206036 }, { "epoch": 20.0, "learning_rate": 1e-05, "loss": 0.0043, "step": 216880 }, { "epoch": 20.0, "eval_accuracy": 0.9827440998128582, "eval_f1": 0.9272743748997957, "eval_loss": 0.1557987779378891, "eval_precision": 0.9244839808186809, "eval_recall": 0.9300816646157288, "eval_runtime": 88.3664, "eval_samples_per_second": 306.734, "eval_steps_per_second": 15.345, "step": 216880 }, { "epoch": 21.0, "learning_rate": 9.5e-06, "loss": 0.0041, "step": 227724 }, { "epoch": 21.0, "eval_accuracy": 0.9828412222349464, "eval_f1": 0.92726855170398, "eval_loss": 0.1576606184244156, "eval_precision": 0.9229714190715008, "eval_recall": 0.9316058843271059, "eval_runtime": 88.9473, "eval_samples_per_second": 304.731, "eval_steps_per_second": 15.245, "step": 227724 }, { "epoch": 22.0, "learning_rate": 9e-06, "loss": 0.0036, "step": 238568 }, { "epoch": 22.0, "eval_accuracy": 0.9831050473218129, "eval_f1": 0.9286320918900104, "eval_loss": 0.16860993206501007, "eval_precision": 0.9246028445479497, "eval_recall": 0.9326966103591006, "eval_runtime": 88.4212, "eval_samples_per_second": 306.544, "eval_steps_per_second": 15.336, "step": 238568 }, { "epoch": 23.0, "learning_rate": 8.5e-06, "loss": 0.0033, "step": 249412 }, { "epoch": 23.0, "eval_accuracy": 0.9828832602982384, "eval_f1": 0.9276945586288581, "eval_loss": 0.16733527183532715, "eval_precision": 0.9244355577772224, "eval_recall": 0.9309766193086475, "eval_runtime": 88.9341, "eval_samples_per_second": 304.776, "eval_steps_per_second": 15.247, "step": 249412 }, { "epoch": 24.0, "learning_rate": 8.000000000000001e-06, "loss": 0.0031, "step": 260256 }, { "epoch": 24.0, "eval_accuracy": 0.9835123816592278, "eval_f1": 0.9298053519496189, "eval_loss": 0.16969779133796692, "eval_precision": 0.9274573913043478, "eval_recall": 0.9321652310101801, "eval_runtime": 87.785, "eval_samples_per_second": 308.766, "eval_steps_per_second": 15.447, "step": 260256 }, { "epoch": 25.0, "learning_rate": 7.500000000000001e-06, "loss": 0.0029, "step": 271100 }, { "epoch": 25.0, "eval_accuracy": 0.983328283933777, "eval_f1": 0.9295600197618832, "eval_loss": 0.16894972324371338, "eval_precision": 0.9251374672779401, "eval_recall": 0.9340250587314017, "eval_runtime": 88.675, "eval_samples_per_second": 305.667, "eval_steps_per_second": 15.292, "step": 271100 }, { "epoch": 26.0, "learning_rate": 7e-06, "loss": 0.0026, "step": 281944 }, { "epoch": 26.0, "eval_accuracy": 0.9831079464985917, "eval_f1": 0.9281980972530569, "eval_loss": 0.1714058518409729, "eval_precision": 0.9239348804987877, "eval_recall": 0.9325008390200246, "eval_runtime": 89.606, "eval_samples_per_second": 302.491, "eval_steps_per_second": 15.133, "step": 281944 }, { "epoch": 27.0, "learning_rate": 6.5000000000000004e-06, "loss": 0.0022, "step": 292788 }, { "epoch": 27.0, "eval_accuracy": 0.9831311399128217, "eval_f1": 0.9291277150061997, "eval_loss": 0.16884349286556244, "eval_precision": 0.9257100024985425, "eval_recall": 0.9325707573554088, "eval_runtime": 88.5198, "eval_samples_per_second": 306.203, "eval_steps_per_second": 15.319, "step": 292788 }, { "epoch": 28.0, "learning_rate": 6e-06, "loss": 0.002, "step": 303632 }, { "epoch": 28.0, "eval_accuracy": 0.9833819187041839, "eval_f1": 0.9299846945874496, "eval_loss": 0.17876744270324707, "eval_precision": 0.9253752007531705, "eval_recall": 0.9346403400827833, "eval_runtime": 88.8069, "eval_samples_per_second": 305.213, "eval_steps_per_second": 15.269, "step": 303632 }, { "epoch": 29.0, "learning_rate": 5.500000000000001e-06, "loss": 0.0019, "step": 314476 }, { "epoch": 29.0, "eval_accuracy": 0.9836268991419886, "eval_f1": 0.9311766262342112, "eval_loss": 0.1778053343296051, "eval_precision": 0.9273667859421375, "eval_recall": 0.9350178990938584, "eval_runtime": 89.2258, "eval_samples_per_second": 303.78, "eval_steps_per_second": 15.197, "step": 314476 }, { "epoch": 30.0, "learning_rate": 5e-06, "loss": 0.0018, "step": 325320 }, { "epoch": 30.0, "eval_accuracy": 0.9834732427727146, "eval_f1": 0.9307018886832106, "eval_loss": 0.18135882914066315, "eval_precision": 0.9263569113124429, "eval_recall": 0.9350878174292426, "eval_runtime": 88.7746, "eval_samples_per_second": 305.324, "eval_steps_per_second": 15.275, "step": 325320 }, { "epoch": 31.0, "learning_rate": 4.5e-06, "loss": 0.0016, "step": 336164 }, { "epoch": 31.0, "eval_accuracy": 0.983370321997069, "eval_f1": 0.9299260330348701, "eval_loss": 0.18739104270935059, "eval_precision": 0.9246295067462951, "eval_recall": 0.9352835887683186, "eval_runtime": 89.214, "eval_samples_per_second": 303.82, "eval_steps_per_second": 15.199, "step": 336164 }, { "epoch": 32.0, "learning_rate": 4.000000000000001e-06, "loss": 0.0014, "step": 347008 }, { "epoch": 32.0, "eval_accuracy": 0.9836066049045373, "eval_f1": 0.9308149819620296, "eval_loss": 0.18249443173408508, "eval_precision": 0.927185947775897, "eval_recall": 0.9344725360778611, "eval_runtime": 89.9015, "eval_samples_per_second": 301.497, "eval_steps_per_second": 15.083, "step": 347008 }, { "epoch": 33.0, "learning_rate": 3.5e-06, "loss": 0.0015, "step": 357852 }, { "epoch": 33.0, "eval_accuracy": 0.9833732211738477, "eval_f1": 0.9303621946290954, "eval_loss": 0.18371780216693878, "eval_precision": 0.927196466764812, "eval_recall": 0.9335496140507887, "eval_runtime": 89.8085, "eval_samples_per_second": 301.809, "eval_steps_per_second": 15.099, "step": 357852 }, { "epoch": 34.0, "learning_rate": 3e-06, "loss": 0.0013, "step": 368696 }, { "epoch": 34.0, "eval_accuracy": 0.9834457005933165, "eval_f1": 0.9307165143748607, "eval_loss": 0.1884261518716812, "eval_precision": 0.9271006771006771, "eval_recall": 0.9343606667412462, "eval_runtime": 90.3295, "eval_samples_per_second": 300.068, "eval_steps_per_second": 15.012, "step": 368696 }, { "epoch": 35.0, "learning_rate": 2.5e-06, "loss": 0.0012, "step": 379540 }, { "epoch": 35.0, "eval_accuracy": 0.9836544413213868, "eval_f1": 0.9312036572448013, "eval_loss": 0.18962261080741882, "eval_precision": 0.9281507001555901, "eval_recall": 0.9342767647387851, "eval_runtime": 89.7245, "eval_samples_per_second": 302.092, "eval_steps_per_second": 15.113, "step": 379540 }, { "epoch": 36.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.0011, "step": 390384 }, { "epoch": 36.0, "eval_accuracy": 0.9836124032580948, "eval_f1": 0.9311286323238515, "eval_loss": 0.18686576187610626, "eval_precision": 0.9275881210102692, "eval_recall": 0.9346962747510907, "eval_runtime": 90.2423, "eval_samples_per_second": 300.358, "eval_steps_per_second": 15.026, "step": 390384 }, { "epoch": 37.0, "learning_rate": 1.5e-06, "loss": 0.001, "step": 401228 }, { "epoch": 37.0, "eval_accuracy": 0.9836442942026612, "eval_f1": 0.9311170842443909, "eval_loss": 0.18996329605579376, "eval_precision": 0.9273587261075817, "eval_recall": 0.9349060297572436, "eval_runtime": 88.7307, "eval_samples_per_second": 305.475, "eval_steps_per_second": 15.282, "step": 401228 }, { "epoch": 38.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.001, "step": 412072 }, { "epoch": 38.0, "eval_accuracy": 0.9837356182711919, "eval_f1": 0.9316876434183994, "eval_loss": 0.19159720838069916, "eval_precision": 0.9266231431021605, "eval_recall": 0.9368078084796957, "eval_runtime": 88.7508, "eval_samples_per_second": 305.406, "eval_steps_per_second": 15.279, "step": 412072 }, { "epoch": 39.0, "learning_rate": 5.000000000000001e-07, "loss": 0.001, "step": 422916 }, { "epoch": 39.0, "eval_accuracy": 0.9837994001603245, "eval_f1": 0.9321045231167601, "eval_loss": 0.1949097365140915, "eval_precision": 0.927983367983368, "eval_recall": 0.9362624454636984, "eval_runtime": 88.5519, "eval_samples_per_second": 306.092, "eval_steps_per_second": 15.313, "step": 422916 }, { "epoch": 40.0, "learning_rate": 0.0, "loss": 0.0009, "step": 433760 }, { "epoch": 40.0, "eval_accuracy": 0.9839153672314747, "eval_f1": 0.9324748546966902, "eval_loss": 0.19464968144893646, "eval_precision": 0.9283328482530179, "eval_recall": 0.9366539881418503, "eval_runtime": 88.4748, "eval_samples_per_second": 306.358, "eval_steps_per_second": 15.326, "step": 433760 } ], "logging_steps": 500, "max_steps": 433760, "num_train_epochs": 40, "save_steps": 500, "total_flos": 1.1334359572090675e+18, "trial_name": null, "trial_params": null }